We are going to use generalized additive models (GAMs) to estimate time-varying coefficients for climate variables in order to quantify their effects on malaria incidence by specie thought the time period of the study.

First, lets load some packages to handle the tasks we are going to do.

library(htmltools)

# Utilities
library(tictoc)

# Data loading
library(readr)
library(skimr)

# Data manipulation
library(tibble)
library(lubridate)
library(dplyr)
library(splitTools)
library(scales)
library(DT)
library(stringr)

# Time series
library(forecast)

# Plots
library(ggplot2)
ggplot2::theme_set(theme_bw())
ggplot2::theme_update(panel.grid = element_blank())
library(ggsci)
library(GGally)
library(DHARMa)

# GAM
library(mgcv)
library(gratia)

Data wrangling

Loading

The cleaned up data can be found on the ~/data/processed/ directory on this repository. Define the file path to read the data.

processed_data_path <- file.path("../data/processed/") 
file_name           <- "pro_malaria-monthly-cases-district-loreto"
file                <- paste(file_name, ".csv", sep = "")
file_path           <- file.path(processed_data_path, file) 

Create a variable containing the column names for the data.

col_names <- c(
  "district",
  "year",
  "month",
  "falciparum",
  "vivax",
  "aet",
  "prcp",
  "q",
  "soilm",
  "tmax",
  "tmin",
  "pop2015",
  "province",
  "region",
  "dttm",
  "time"
)

Define column types to optimize the parsing. We exclude soilm and region for the analysis.

# Column types
col_types <- 
  readr::cols_only(
    district      = col_character(),
    year          = col_integer(),
    month         = col_integer(),
    falciparum    = col_integer(),
    vivax         = col_integer(),
    aet           = col_double(),
    prcp          = col_double(),
    q             = col_double(),
    tmax          = col_double(),
    tmin          = col_double(),
    pop2015       = col_integer(),
    province      = col_character(),
    dttm          = col_datetime(),
    time          = col_integer()
  )

Reading csv. file into a data frame.

df_malaria_loreto <- 
  readr::read_csv(
    file      = file_path,
    col_names = col_names,
    col_types = col_types,
    skip      = 1,
    locale    = readr::locale(encoding = "UTF-8")
  )

Parsing

We are going to parse district, province, year and month as factors.

to_factor <- c("year", "month", "district", "province")

for (col in to_factor) {
  df_malaria_loreto[[col]] <- factor(df_malaria_loreto[[col]])
}

Validation

Check data structure.

str(df_malaria_loreto)
## tibble [10,584 x 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ district  : Factor w/ 49 levels "ALTO NANAY","ALTO TAPICHE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ year      : Factor w/ 18 levels "2000","2001",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ month     : Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ falciparum: int [1:10584] 2 2 2 0 3 1 2 11 6 12 ...
##  $ vivax     : int [1:10584] 42 40 21 3 15 0 4 1 2 4 ...
##  $ aet       : num [1:10584] 90.8 82.1 66.8 61.8 70 ...
##  $ prcp      : num [1:10584] 227 278 358 334 380 ...
##  $ q         : num [1:10584] 136 196 291 272 310 ...
##  $ tmax      : num [1:10584] 31 31 30.4 30 30 ...
##  $ tmin      : num [1:10584] 21.7 21.5 21.3 20.8 21.3 ...
##  $ pop2015   : int [1:10584] 2784 2784 2784 2784 2784 2784 2784 2784 2784 2784 ...
##  $ province  : Factor w/ 8 levels "ALTO AMAZONAS",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ dttm      : POSIXct[1:10584], format: "2000-01-01" "2000-02-01" ...
##  $ time      : int [1:10584] 946684800 949363200 951868800 954547200 957139200 959817600 962409600 965088000 967766400 970358400 ...
##  - attr(*, "spec")=
##   .. cols_only(
##   ..   district = col_character(),
##   ..   year = col_integer(),
##   ..   month = col_integer(),
##   ..   falciparum = col_integer(),
##   ..   vivax = col_integer(),
##   ..   aet = col_double(),
##   ..   prcp = col_double(),
##   ..   q = col_double(),
##   ..   soilm = col_skip(),
##   ..   tmax = col_double(),
##   ..   tmin = col_double(),
##   ..   pop2015 = col_integer(),
##   ..   province = col_character(),
##   ..   region = col_skip(),
##   ..   dttm = col_datetime(format = ""),
##   ..   time = col_integer()
##   .. )

To display a summary of the data, we first define a variable containing a table with summary statistics for each data type in the data frame using the skim function in the skimr package.

skim_malaria <- skimr::skim(df_malaria_loreto)

Then, we display the summary tables for each data type using the yank function of the same package:

  • Factors:
skimr::yank(skim_malaria, "factor")

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
district 0 1 FALSE 49 ALT: 216, ALT: 216, BAL: 216, BAR: 216
year 0 1 FALSE 18 200: 588, 200: 588, 200: 588, 200: 588
month 0 1 FALSE 12 1: 882, 2: 882, 3: 882, 4: 882
province 0 1 FALSE 8 MAY: 2376, REQ: 2376, ALT: 1296, UCA: 1296
  • Numeric:
skimr::yank(skim_malaria, "numeric")

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
falciparum 0 1 1.576000e+01 50.88 0.00 0.000000e+00 0.000000e+00 1.000000e+01 1.240000e+03 ▇▁▁▁▁
vivax 0 1 5.018000e+01 121.77 0.00 0.000000e+00 8.000000e+00 4.400000e+01 1.936000e+03 ▇▁▁▁▁
aet 0 1 8.314000e+01 11.78 42.22 7.434000e+01 8.204000e+01 9.163000e+01 1.183400e+02 ▁▃▇▅▁
prcp 0 1 2.229800e+02 112.00 14.13 1.445600e+02 2.041500e+02 2.807300e+02 8.110900e+02 ▆▇▂▁▁
q 0 1 1.398400e+02 113.94 0.71 5.249000e+01 1.195000e+02 1.990600e+02 7.390900e+02 ▇▅▁▁▁
tmax 0 1 3.143000e+01 1.12 26.90 3.070000e+01 3.144000e+01 3.217000e+01 3.514000e+01 ▁▂▇▅▁
tmin 0 1 2.141000e+01 0.83 17.78 2.092000e+01 2.151000e+01 2.195000e+01 2.501000e+01 ▁▂▇▂▁
pop2015 0 1 2.121167e+04 32481.69 690.00 6.017000e+03 1.074500e+04 1.706100e+04 1.546960e+05 ▇▁▁▁▁
time 0 1 1.229362e+09 163983470.51 946684800.00 1.087992e+09 1.229429e+09 1.370693e+09 1.512086e+09 ▇▇▇▇▇
  • Datetime:
skimr::yank(skim_malaria, "POSIXct")

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
dttm 0 1 2000-01-01 2017-12-01 2008-12-16 12:00:00 216

Feature engineering

Create a copy of the data frame where we are going to transform and create some new columns.

dat <- tibble::tibble(df_malaria_loreto)

Scaling

First, we are going to scale all climate variables from 0 to 1. The aim of this step is to avoid numerical problems in the GAM fitting procedures, and also to have a common scale on the coefficients.

to_scale <- c("aet", "prcp", "q", "soilm", "tmax", "tmin")

for (col in to_scale) {
  dat[[col]] <- scales::rescale(df_malaria_loreto[[col]], to = c(0, 1))
}

Lagged variables

Lagged variables are going to be used to build distributed lag models (DLM). In order to properly create lagged variables for this data set, first we are going to group the data by districts. Next, we arrange the data in each group by date. Finally, la lag function of the package dplyr is applied to create lagged variables by 1, 3, 6, and 12 months.

dat <- dat %>% 
  dplyr::group_by(district) %>%
  dplyr::arrange(dttm) %>% 
  dplyr::mutate(
    aet_lag1    = dplyr::lag(aet,  n = 1L),
    aet_lag3    = dplyr::lag(aet,  n = 3L),
    aet_lag6    = dplyr::lag(aet,  n = 6L),
    aet_lag12   = dplyr::lag(aet,  n = 12L),
    prcp_lag1   = dplyr::lag(prcp, n = 1L),
    prcp_lag3   = dplyr::lag(prcp, n = 3L),
    prcp_lag6   = dplyr::lag(prcp, n = 6L),
    prcp_lag12  = dplyr::lag(prcp, n = 12L),
    q_lag1      = dplyr::lag(q,    n = 1L),
    q_lag3      = dplyr::lag(q,    n = 3L),
    q_lag6      = dplyr::lag(q,    n = 6L),
    q_lag12     = dplyr::lag(q,    n = 12L),
    soilm_lag1  = dplyr::lag(q,    n = 1L),
    soilm_lag3  = dplyr::lag(q,    n = 3L),
    soilm_lag6  = dplyr::lag(q,    n = 6L),
    soilm_lag12 = dplyr::lag(q,    n = 12L),
    tmax_lag1   = dplyr::lag(tmax, n = 1L),
    tmax_lag3   = dplyr::lag(tmax, n = 3L),
    tmax_lag6   = dplyr::lag(tmax, n = 6L),
    tmax_lag12  = dplyr::lag(tmax, n = 12L),
    tmin_lag1   = dplyr::lag(tmin, n = 1L),
    tmin_lag3   = dplyr::lag(tmin, n = 3L),
    tmin_lag6   = dplyr::lag(tmin, n = 6L),
    tmin_lag12  = dplyr::lag(tmin, n = 12L)
  )
dat
## # A tibble: 10,584 x 38
## # Groups:   district [49]
##    district year  month falciparum vivax   aet  prcp      q  tmax  tmin pop2015
##    <fct>    <fct> <fct>      <int> <int> <dbl> <dbl>  <dbl> <dbl> <dbl>   <int>
##  1 ALTO NA~ 2000  1              2    42 0.638 0.267 0.184  0.501 0.540    2784
##  2 ALTO TA~ 2000  1              1     0 0.578 0.194 0.111  0.527 0.419    2106
##  3 BALSAPU~ 2000  1              0     9 0.602 0.178 0.0907 0.305 0.285   17436
##  4 BARRANCA 2000  1              9     9 0.643 0.187 0.0965 0.318 0.319   13608
##  5 BELEN    2000  1              3    68 0.581 0.302 0.227  0.516 0.520   75685
##  6 CAHUAPA~ 2000  1              0     2 0.635 0.189 0.0995 0.355 0.389    8331
##  7 CAPELO   2000  1              0     0 0.568 0.240 0.161  0.584 0.513    4454
##  8 CONTAMA~ 2000  1              0     0 0.672 0.178 0.0833 0.597 0.352   27273
##  9 EMILIO ~ 2000  1              0     2 0.596 0.210 0.127  0.594 0.484    7488
## 10 FERNAND~ 2000  1              2    17 0.559 0.292 0.218  0.501 0.504   20225
## # ... with 10,574 more rows, and 27 more variables: province <fct>,
## #   dttm <dttm>, time <int>, aet_lag1 <dbl>, aet_lag3 <dbl>, aet_lag6 <dbl>,
## #   aet_lag12 <dbl>, prcp_lag1 <dbl>, prcp_lag3 <dbl>, prcp_lag6 <dbl>,
## #   prcp_lag12 <dbl>, q_lag1 <dbl>, q_lag3 <dbl>, q_lag6 <dbl>, q_lag12 <dbl>,
## #   soilm_lag1 <dbl>, soilm_lag3 <dbl>, soilm_lag6 <dbl>, soilm_lag12 <dbl>,
## #   tmax_lag1 <dbl>, tmax_lag3 <dbl>, tmax_lag6 <dbl>, tmax_lag12 <dbl>,
## #   tmin_lag1 <dbl>, tmin_lag3 <dbl>, tmin_lag6 <dbl>, tmin_lag12 <dbl>

Since NA values are going to be present for the first 12 months (because of the 12 month lagged variables), we have to do a filter.

dat <- na.omit(dat)

Finally, we create a column containing the values of the districts’ population on the natural logarithmic scale.

dat$ln_pop2015 <- log(dat$pop2015)

Display the final data frame for checking.

dat
## # A tibble: 9,996 x 39
## # Groups:   district [49]
##    district year  month falciparum vivax   aet  prcp      q  tmax  tmin pop2015
##    <fct>    <fct> <fct>      <int> <int> <dbl> <dbl>  <dbl> <dbl> <dbl>   <int>
##  1 ALTO NA~ 2001  1              6     2 0.214 0.414 0.386  0.387 0.411    2784
##  2 ALTO TA~ 2001  1              0     0 0.295 0.217 0.164  0.495 0.382    2106
##  3 BALSAPU~ 2001  1             39    43 0.359 0.153 0.0896 0.285 0.262   17436
##  4 BARRANCA 2001  1              9    21 0.362 0.180 0.118  0.296 0.295   13608
##  5 BELEN    2001  1              1    37 0.163 0.498 0.481  0.408 0.397   75685
##  6 CAHUAPA~ 2001  1             27     2 0.351 0.167 0.105  0.328 0.359    8331
##  7 CAPELO   2001  1              0     0 0.200 0.301 0.265  0.509 0.428    4454
##  8 CONTAMA~ 2001  1              0     0 0.523 0.183 0.104  0.609 0.366   27273
##  9 EMILIO ~ 2001  1              2     3 0.285 0.228 0.177  0.551 0.436    7488
## 10 FERNAND~ 2001  1              1     9 0.162 0.471 0.452  0.404 0.393   20225
## # ... with 9,986 more rows, and 28 more variables: province <fct>, dttm <dttm>,
## #   time <int>, aet_lag1 <dbl>, aet_lag3 <dbl>, aet_lag6 <dbl>,
## #   aet_lag12 <dbl>, prcp_lag1 <dbl>, prcp_lag3 <dbl>, prcp_lag6 <dbl>,
## #   prcp_lag12 <dbl>, q_lag1 <dbl>, q_lag3 <dbl>, q_lag6 <dbl>, q_lag12 <dbl>,
## #   soilm_lag1 <dbl>, soilm_lag3 <dbl>, soilm_lag6 <dbl>, soilm_lag12 <dbl>,
## #   tmax_lag1 <dbl>, tmax_lag3 <dbl>, tmax_lag6 <dbl>, tmax_lag12 <dbl>,
## #   tmin_lag1 <dbl>, tmin_lag3 <dbl>, tmin_lag6 <dbl>, tmin_lag12 <dbl>,
## #   ln_pop2015 <dbl>

Time-varying coefficients model building

# set.seed(20210223)
# 
# indices <- splitTools::partition(
#   y    = dat$district, 
#   type = "stratified", 
#   p    = c(train = 0.88, test = 0.12)
# )
# 
# train <- dat[indices$train, ]
# test  <- dat[indices$test, ]
# get_oos_dev <- function(model, prediction, y, weights = NULL) {
# 
#   if(is.null(weights)) weights <- rep(1, times= length(y))
# 
#   dev_residuals <- model$family$dev.resids(y, prediction, weights)
#   deviance      <- sum(dev_residuals)
#   deviance
# }

TVCM: Model 0

Predictors: aet, prcp, q, tmax, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_0 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = prcp,  bs = "tp", k = 30) +
    s(time, by = q,     bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 3.77 sec elapsed
cat("\nConverged?", vivax_tvcm_0$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_0)
## 
## Family: Negative Binomial(0.908) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = q, bs = "tp", k = 30) + 
##     s(time, by = tmax, bs = "tp", k = 100) + s(time, by = tmin, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.1149     0.3371  -18.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.827 48.000 280.201  < 2e-16 ***
## s(time):aet  17.389 20.839   6.624  < 2e-16 ***
## s(time):prcp  2.003  2.004   1.171 0.310036    
## s(time):q     5.688  6.817   3.991 0.000315 ***
## s(time):tmax 62.098 74.336  11.439  < 2e-16 ***
## s(time):tmin 19.869 24.080  13.872  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.456   Deviance explained = 70.1%
## fREML =  14811  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_0, type = "deviance", pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -3.367162e-06 -3.355952e-06 -2.271658e-04 -9.735585e-04 -3.874418e-05
## [6] -3.720399e-06
## 
## $hess
##               [,1]          [,2]         [,3]          [,4]          [,5]
## [1,]  2.337996e+01 -3.006481e-02 3.191238e-06  0.0003387835 -3.216310e-01
## [2,] -3.006481e-02  2.351686e+00 8.253129e-07  0.0162119750  4.608871e-01
## [3,]  3.191238e-06  8.253129e-07 2.277589e-04  0.0009991606  3.471305e-05
## [4,]  3.387835e-04  1.621198e-02 9.991606e-04  1.4730925139  3.036194e-02
## [5,] -3.216310e-01  4.608871e-01 3.471305e-05  0.0303619402  1.624212e+01
## [6,] -1.518762e-02  4.888146e-01 2.586573e-06 -0.0086091634  8.189517e-01
##               [,6]
## [1,] -1.518762e-02
## [2,]  4.888146e-01
## [3,]  2.586573e-06
## [4,] -8.609163e-03
## [5,]  8.189517e-01
## [6,]  2.299578e+00
## 
## Model rank =  360 / 360 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.83      NA      NA
## s(time):aet   50.00  17.39    0.87    0.21
## s(time):prcp  30.00   2.00    0.87    0.24
## s(time):q     30.00   5.69    0.87    0.20
## s(time):tmax 100.00  62.10    0.87    0.18
## s(time):tmin 100.00  19.87    0.87    0.14
tibble::tibble(
  x = lubridate::as_datetime(dat$time),
  y = vivax_tvcm_0$residuals
) %>% 
  ggplot(aes(x = x, y = y)) +
  geom_point() +
  scale_x_datetime(date_labels = "%Y", date_breaks = "1 year") +
  labs(x = NULL, y = "Deviance residuals")

mgcv::plot.gam(
  x          = vivax_tvcm_0, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_0)
##          para s(district) s(time):aet s(time):prcp s(time):q s(time):tmax
## worst       1  1.00000000   0.9991857    0.9999320 0.9998684    0.9965358
## observed    1  0.39902337   0.9940252    0.9990555 0.9983211    0.9813848
## estimate    1  0.06455737   0.9933047    0.9991923 0.9986086    0.9787407
##          s(time):tmin
## worst       0.9945440
## observed    0.9765045
## estimate    0.9821762
mgcv::concurvity(vivax_tvcm_0, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):q
## para         1.0000000  0.02040816   0.3277867    0.2939797 0.2346176
## s(district)  1.0000000  1.00000000   0.3289483    0.3074769 0.2607852
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8019478 0.6666345
## s(time):prcp 0.8070714  0.01738709   0.7471841    1.0000000 0.9638060
## s(time):q    0.6399314  0.01471070   0.5604360    0.9621734 1.0000000
## s(time):tmax 0.9593538  0.02031868   0.9563570    0.8155472 0.7073408
## s(time):tmin 0.9692789  0.02029111   0.9515915    0.8769878 0.7818448
##              s(time):tmax s(time):tmin
## para            0.3253505    0.3294669
## s(district)     0.3346232    0.3352775
## s(time):aet     0.9495592    0.9387231
## s(time):prcp    0.7652513    0.8520376
## s(time):q       0.5965255    0.7094315
## s(time):tmax    1.0000000    0.9514297
## s(time):tmin    0.9488907    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_0 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = prcp,  bs = "tp", k = 30) +
    s(time, by = q,     bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 3.31 sec elapsed
cat("\nConverged?", falciparum_tvcm_0$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_0)
## 
## Family: Negative Binomial(0.529) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = q, bs = "tp", k = 30) + 
##     s(time, by = tmax, bs = "tp", k = 100) + s(time, by = tmin, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4264     0.4105  -18.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.546 49.000 171.324  <2e-16 ***
## s(time):aet   7.192  8.605  10.225  <2e-16 ***
## s(time):prcp  3.239  3.658   2.292  0.0620 .  
## s(time):q     5.836  6.952   2.613  0.0114 *  
## s(time):tmax 59.062 70.945   6.070  <2e-16 ***
## s(time):tmin 21.449 26.016   7.307  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.403   Deviance explained = 66.5%
## fREML =  13772  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_0, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -3.518868e-08 -1.443560e-08 -2.223056e-08 -5.257530e-08 -8.094957e-07
## [6] -3.101533e-07
## 
## $hess
##               [,1]        [,2]          [,3]         [,4]        [,5]
## [1,] 22.6358899730 -0.01350932 -0.0009755681 -0.001422401 -0.28478750
## [2,] -0.0135093222  1.89531300  0.0157292065 -0.069450446 -0.06723019
## [3,] -0.0009755681  0.01572921  0.1152797305  0.069492242 -0.00136545
## [4,] -0.0014224011 -0.06945045  0.0694922418  0.985540319 -0.02624397
## [5,] -0.2847875031 -0.06723019 -0.0013654498 -0.026243973 11.84634291
## [6,] -0.0623835530  0.04806034 -0.0059423719 -0.005151761 -0.41295191
##              [,6]
## [1,] -0.062383553
## [2,]  0.048060343
## [3,] -0.005942372
## [4,] -0.005151761
## [5,] -0.412951912
## [6,]  2.769777054
## 
## Model rank =  360 / 360 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.55      NA      NA
## s(time):aet   50.00   7.19    0.85    0.94
## s(time):prcp  30.00   3.24    0.85    0.94
## s(time):q     30.00   5.84    0.85    0.94
## s(time):tmax 100.00  59.06    0.85    0.95
## s(time):tmin 100.00  21.45    0.85    0.95
mgcv::plot.gam(
  x          = falciparum_tvcm_0, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_0)
##          para s(district) s(time):aet s(time):prcp s(time):q s(time):tmax
## worst       1  1.00000000   0.9991857    0.9999320 0.9998684    0.9965358
## observed    1  0.39611215   0.9931296    0.9991574 0.9986375    0.9796553
## estimate    1  0.06455737   0.9933039    0.9991922 0.9986085    0.9787434
##          s(time):tmin
## worst       0.9945440
## observed    0.9772649
## estimate    0.9821782
mgcv::concurvity(falciparum_tvcm_0, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):q
## para         1.0000000  0.02040816   0.3279674    0.2942235 0.2348166
## s(district)  1.0000000  1.00000000   0.3291297    0.3077350 0.2610108
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8019210 0.6666026
## s(time):prcp 0.8070714  0.01738709   0.7471705    1.0000000 0.9638083
## s(time):q    0.6399314  0.01471070   0.5604213    0.9621755 1.0000000
## s(time):tmax 0.9593538  0.02031868   0.9563535    0.8155208 0.7073092
## s(time):tmin 0.9692789  0.02029111   0.9515887    0.8769723 0.7818254
##              s(time):tmax s(time):tmin
## para            0.3254775    0.3295965
## s(district)     0.3347548    0.3354092
## s(time):aet     0.9495558    0.9387202
## s(time):prcp    0.7652405    0.8520308
## s(time):q       0.5965152    0.7094251
## s(time):tmax    1.0000000    0.9514276
## s(time):tmin    0.9488885    1.0000000

TVCM: Model 1

Predictors: aet, q, tmax, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_1 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = q,     bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.79 sec elapsed
cat("\nConverged?", vivax_tvcm_1$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_1)
## 
## Family: Negative Binomial(0.908) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmax, bs = "tp", k = 100) + s(time, 
##     by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.0633     0.3339  -18.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.827 49.000 274.521  < 2e-16 ***
## s(time):aet  17.432 20.890   8.455  < 2e-16 ***
## s(time):q     5.683  6.811   5.561 3.63e-06 ***
## s(time):tmax 62.138 74.384  11.504  < 2e-16 ***
## s(time):tmin 19.933 24.158  13.831  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.455   Deviance explained = 70.1%
## fREML =  14814  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_1, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -3.774402e-08 -2.406240e-07 -2.981255e-08 -6.683805e-07 -1.951544e-07
## 
## $hess
##               [,1]        [,2]          [,3]        [,4]         [,5]
## [1,] 23.3806467951 -0.03435635  0.0006304787 -0.32306655 -0.015678796
## [2,] -0.0343563451  2.41288040  0.0145859015  0.43709118  0.482079442
## [3,]  0.0006304787  0.01458590  1.4569708243  0.02741787 -0.007551268
## [4,] -0.3230665489  0.43709118  0.0274178667 16.20079550  0.836400811
## [5,] -0.0156787958  0.48207944 -0.0075512676  0.83640081  2.245322803
## 
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value    
## s(district)   49.00  47.83      NA      NA    
## s(time):aet   50.00  17.43    0.84   0.005 ** 
## s(time):q     30.00   5.68    0.84  <2e-16 ***
## s(time):tmax 100.00  62.14    0.84   0.005 ** 
## s(time):tmin 100.00  19.93    0.84  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
  x          = vivax_tvcm_1, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_1)
##          para s(district) s(time):aet s(time):q s(time):tmax s(time):tmin
## worst       1  1.00000000   0.9873856 0.9144782    0.9964242    0.9941011
## observed    1  0.39435693   0.9740345 0.8434204    0.9805507    0.9755251
## estimate    1  0.06325033   0.9758498 0.8481380    0.9778193    0.9814017
mgcv::concurvity(vivax_tvcm_1, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmax
## para         1.0000000  0.02040816   0.3265168 0.2335104    0.3241157
## s(district)  1.0000000  1.00000000   0.3276732 0.2595149    0.3333457
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6668459    0.9495827
## s(time):q    0.6399314  0.01471070   0.5604452 1.0000000    0.5965317
## s(time):tmax 0.9593538  0.02031868   0.9563805 0.7075603    1.0000000
## s(time):tmin 0.9692789  0.02029111   0.9516124 0.7819756    0.9489026
##              s(time):tmin
## para            0.3282028
## s(district)     0.3339925
## s(time):aet     0.9387457
## s(time):q       0.7094209
## s(time):tmax    0.9514415
## s(time):tmin    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_1 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = q,     bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.89 sec elapsed
cat("\nConverged?", falciparum_tvcm_1$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_1)
## 
## Family: Negative Binomial(0.529) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmax, bs = "tp", k = 100) + s(time, 
##     by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4018     0.4065  -18.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.545 49.000 171.291  <2e-16 ***
## s(time):aet   7.191  8.607  11.491  <2e-16 ***
## s(time):q     6.664  8.016   8.034  <2e-16 ***
## s(time):tmax 59.697 71.621   6.130  <2e-16 ***
## s(time):tmin 21.667 26.279   7.102  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.402   Deviance explained = 66.5%
## fREML =  13778  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_1, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -3.953605e-08 -5.510486e-09 -5.903285e-08 -8.373566e-07 -3.139063e-07
## 
## $hess
##              [,1]        [,2]         [,3]       [,4]        [,5]
## [1,] 22.635798175 -0.01344865 -0.002555688 -0.2905029 -0.06355166
## [2,] -0.013448647  1.98098897 -0.038133451 -0.0622132  0.03828143
## [3,] -0.002555688 -0.03813345  1.375141206 -0.0624381 -0.02176061
## [4,] -0.290502947 -0.06221320 -0.062438105 12.0142663 -0.40130340
## [5,] -0.063551662  0.03828143 -0.021760608 -0.4013034  2.84890650
## 
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.55      NA      NA
## s(time):aet   50.00   7.19    0.84    0.91
## s(time):q     30.00   6.66    0.84    0.93
## s(time):tmax 100.00  59.70    0.84    0.88
## s(time):tmin 100.00  21.67    0.84    0.88
mgcv::plot.gam(
  x          = falciparum_tvcm_1, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_1)
##          para s(district) s(time):aet s(time):q s(time):tmax s(time):tmin
## worst       1  1.00000000   0.9873856 0.9144782    0.9964242    0.9941011
## observed    1  0.39339010   0.9676825 0.8359675    0.9786208    0.9761656
## estimate    1  0.06325033   0.9758953 0.8483709    0.9779531    0.9814996
mgcv::concurvity(falciparum_tvcm_1, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmax
## para         1.0000000  0.02040816   0.3314763 0.2373774    0.3294197
## s(district)  1.0000000  1.00000000   0.3326532 0.2639145    0.3388293
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6663847    0.9494902
## s(time):q    0.6399314  0.01471070   0.5605638 1.0000000    0.5965780
## s(time):tmax 0.9593538  0.02031868   0.9563137 0.7071295    1.0000000
## s(time):tmin 0.9692789  0.02029111   0.9515541 0.7817192    0.9488734
##              s(time):tmin
## para            0.3336369
## s(district)     0.3395147
## s(time):aet     0.9386541
## s(time):q       0.7095184
## s(time):tmax    0.9514141
## s(time):tmin    1.0000000

TVCM: Model 2

Predictors: aet, prcp, tmax, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_2 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = prcp,  bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.78 sec elapsed
cat("\nConverged?", vivax_tvcm_2$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_2)
## 
## Family: Negative Binomial(0.908) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100) + 
##     s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.0490     0.3344  -18.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df       F  p-value    
## s(district)  47.83  49.00 274.419  < 2e-16 ***
## s(time):aet  17.52  20.99   8.394  < 2e-16 ***
## s(time):prcp  5.56   6.66   5.299 9.56e-06 ***
## s(time):tmax 62.16  74.41  11.521  < 2e-16 ***
## s(time):tmin 19.89  24.10  13.747  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.455   Deviance explained = 70.1%
## fREML =  14814  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_2, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -4.705703e-08 -2.966713e-07 -3.397138e-08 -8.221233e-07 -2.393096e-07
## 
## $hess
##              [,1]        [,2]         [,3]        [,4]         [,5]
## [1,] 23.380150632 -0.03461292  0.001477193 -0.32273648 -0.015300875
## [2,] -0.034612920  2.44629002  0.011640769  0.43097790  0.479687452
## [3,]  0.001477193  0.01164077  1.434720546  0.02860737 -0.008259271
## [4,] -0.322736479  0.43097790  0.028607374 16.20117095  0.840785332
## [5,] -0.015300875  0.47968745 -0.008259271  0.84078533  2.208557963
## 
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.83      NA      NA
## s(time):aet   50.00  17.52    0.89    0.66
## s(time):prcp  30.00   5.56    0.89    0.64
## s(time):tmax 100.00  62.16    0.89    0.66
## s(time):tmin 100.00  19.89    0.89    0.74
mgcv::plot.gam(
  x          = vivax_tvcm_2, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_2)
##          para s(district) s(time):aet s(time):prcp s(time):tmax s(time):tmin
## worst       1  1.00000000   0.9869412    0.9622823    0.9964367    0.9941330
## observed    1  0.39371642   0.9746461    0.9123273    0.9806178    0.9755543
## estimate    1  0.06319163   0.9755038    0.9124919    0.9778706    0.9814604
mgcv::concurvity(vivax_tvcm_2, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmax
## para         1.0000000  0.02040816   0.3263912    0.2923165    0.3242324
## s(district)  1.0000000  1.00000000   0.3275472    0.3057018    0.3334647
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8022260    0.9495872
## s(time):prcp 0.8070714  0.01738709   0.7472882    1.0000000    0.7653253
## s(time):tmax 0.9593538  0.02031868   0.9563895    0.8158516    1.0000000
## s(time):tmin 0.9692789  0.02029111   0.9516214    0.8771643    0.9489120
##              s(time):tmin
## para            0.3283258
## s(district)     0.3341169
## s(time):aet     0.9387486
## s(time):prcp    0.8520830
## s(time):tmax    0.9514512
## s(time):tmin    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_2 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,   bs = "tp", k = 50) +
    s(time, by = prcp,  bs = "tp", k = 30) +
    s(time, by = tmax,  bs = "tp", k = 100) +
    s(time, by = tmin,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.58 sec elapsed
cat("\nConverged?", falciparum_tvcm_2$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_2)
## 
## Family: Negative Binomial(0.529) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100) + 
##     s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3721     0.4074  -18.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.545 48.000 174.727  <2e-16 ***
## s(time):aet   6.995  8.366  10.820  <2e-16 ***
## s(time):prcp  6.564  7.895   7.666  <2e-16 ***
## s(time):tmax 59.956 71.885   6.137  <2e-16 ***
## s(time):tmin 21.784 26.419   7.077  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.404   Deviance explained = 66.5%
## fREML =  13778  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_2, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -2.180887e-08 -2.163537e-09 -3.442661e-08 -4.918663e-07 -1.809657e-07
## 
## $hess
##              [,1]        [,2]         [,3]        [,4]        [,5]
## [1,] 22.633317911 -0.01330178 -0.003626106 -0.28520792 -0.06332660
## [2,] -0.013301776  1.92861198  0.013414445 -0.07817084  0.03587574
## [3,] -0.003626106  0.01341444  1.274784052 -0.10945297 -0.03129743
## [4,] -0.285207915 -0.07817084 -0.109452972 11.99518120 -0.38841947
## [5,] -0.063326595  0.03587574 -0.031297435 -0.38841947  2.84976050
## 
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.54      NA      NA
## s(time):aet   50.00   7.00    0.82    0.22
## s(time):prcp  30.00   6.56    0.82    0.20
## s(time):tmax 100.00  59.96    0.82    0.22
## s(time):tmin 100.00  21.78    0.82    0.20
mgcv::plot.gam(
  x          = falciparum_tvcm_2, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_2)
##          para s(district) s(time):aet s(time):prcp s(time):tmax s(time):tmin
## worst       1  1.00000000   0.9869412    0.9622823    0.9964367    0.9941330
## observed    1  0.39272003   0.9698421    0.9061720    0.9786591    0.9762106
## estimate    1  0.06319163   0.9755316    0.9125252    0.9779537    0.9815205
mgcv::concurvity(falciparum_tvcm_2, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmax
## para         1.0000000  0.02040816   0.3297986    0.2957433    0.3276114
## s(district)  1.0000000  1.00000000   0.3309685    0.3093380    0.3369594
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8018830    0.9495236
## s(time):prcp 0.8070714  0.01738709   0.7472437    1.0000000    0.7652447
## s(time):tmax 0.9593538  0.02031868   0.9563369    0.8155077    1.0000000
## s(time):tmin 0.9692789  0.02029111   0.9515749    0.8769689    0.9488847
##              s(time):tmin
## para            0.3317847
## s(district)     0.3376325
## s(time):aet     0.9386872
## s(time):prcp    0.8520404
## s(time):tmax    0.9514248
## s(time):tmin    1.0000000

TVCM: Model 3

P. vivax

Predictors: aet, q, tmin

tictoc::tic("GAM model fitting")
vivax_tvcm_3 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.02 sec elapsed
cat("\nConverged?", vivax_tvcm_3$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_3)
## 
## Family: Negative Binomial(0.84) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.6287     0.3369  -19.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.844 48.000 307.252  < 2e-16 ***
## s(time):aet   5.612  6.652   4.655 5.75e-05 ***
## s(time):q     5.807  6.972   4.861 1.93e-05 ***
## s(time):tmin 67.408 80.119   9.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.495   Deviance explained = 67.8%
## fREML =  14797  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_3, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 2.866972e-09 2.097608e-08 1.641944e-08 1.491855e-07
## 
## $hess
##              [,1]         [,2]         [,3]        [,4]
## [1,] 23.514289047 -0.003531395 -0.002593209 -0.08947966
## [2,] -0.003531395  1.606928284 -0.010680700 -0.15476745
## [3,] -0.002593209 -0.010680700  1.023805126  0.04366450
## [4,] -0.089479657 -0.154767453  0.043664498 22.30828296
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.84      NA      NA
## s(time):aet   50.00   5.61    0.84    0.35
## s(time):q     30.00   5.81    0.84    0.36
## s(time):tmin 100.00  67.41    0.84    0.32
mgcv::plot.gam(
  x          = vivax_tvcm_3, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_3)
##          para s(district) s(time):aet s(time):q s(time):tmin
## worst       1  1.00000000   0.9817423 0.9070719    0.9913637
## observed    1  0.14292296   0.9584068 0.8097720    0.9400999
## estimate    1  0.04435857   0.9644319 0.8284583    0.9686361
mgcv::concurvity(vivax_tvcm_3, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmin
## para         1.0000000  0.02040816   0.3311993 0.2369456    0.3336484
## s(district)  1.0000000  1.00000000   0.3323752 0.2634173    0.3395257
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6665501    0.9386557
## s(time):q    0.6399314  0.01471070   0.5606461 1.0000000    0.7095498
## s(time):tmin 0.9692789  0.02029111   0.9515653 0.7818332    1.0000000

P. falciparum

Predictors: aet, q, tmin

tictoc::tic("GAM model fitting")
falciparum_tvcm_3 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.64 sec elapsed
cat("\nConverged?", falciparum_tvcm_3$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_3)
## 
## Family: Negative Binomial(0.502) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9864     0.4011  -19.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.581 48.000 188.647  <2e-16 ***
## s(time):aet   5.683  6.710  10.252  <2e-16 ***
## s(time):q     7.292  8.784   7.218  <2e-16 ***
## s(time):tmin 57.363 69.563   5.843  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.367   Deviance explained = 64.8%
## fREML =  13753  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_3, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 1.978898e-07 1.151730e-07 1.387608e-07 1.287021e-06
## 
## $hess
##              [,1]         [,2]         [,3]        [,4]
## [1,] 22.867144989 -0.007227575 -0.001749774 -0.08861918
## [2,] -0.007227575  1.111698746 -0.028527310 -0.06439048
## [3,] -0.001749774 -0.028527310  1.590586583 -0.17400596
## [4,] -0.088619177 -0.064390480 -0.174005959 13.33578457
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value  
## s(district)   49.00  47.58      NA      NA  
## s(time):aet   50.00   5.68     0.8    0.15  
## s(time):q     30.00   7.29     0.8    0.12  
## s(time):tmin 100.00  57.36     0.8    0.07 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
  x          = falciparum_tvcm_3, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_3)
##          para s(district) s(time):aet s(time):q s(time):tmin
## worst       1  1.00000000   0.9817423 0.9070719    0.9913637
## observed    1  0.10501527   0.9633845 0.8201042    0.9393871
## estimate    1  0.04435857   0.9643944 0.8283200    0.9685848
mgcv::concurvity(falciparum_tvcm_3, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmin
## para         1.0000000  0.02040816   0.3296516 0.2358854    0.3316726
## s(district)  1.0000000  1.00000000   0.3308209 0.2622116    0.3375185
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6665940    0.9386850
## s(time):q    0.6399314  0.01471070   0.5605220 1.0000000    0.7094785
## s(time):tmin 0.9692789  0.02029111   0.9515773 0.7818353    1.0000000

TVCM: Model 4

Predictors: aet, q, tmax

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_4 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.37 sec elapsed
cat("\nConverged?", vivax_tvcm_4$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_4)
## 
## Family: Negative Binomial(0.871) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmax, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.1403     0.3279  -18.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.830 48.000 275.840  <2e-16 ***
## s(time):aet  20.959 24.919  14.252  <2e-16 ***
## s(time):q     7.488  9.004  11.022  <2e-16 ***
## s(time):tmax 61.746 74.425   8.566  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.491   Deviance explained = 68.9%
## fREML =  14797  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_4, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 1.079556e-08 1.345414e-07 3.612030e-08 3.334112e-07
## 
## $hess
##             [,1]        [,2]        [,3]       [,4]
## [1,] 23.42193672 -0.02028744 -0.01269582 -0.2146663
## [2,] -0.02028744  3.17821799  0.08078295  0.7534201
## [3,] -0.01269582  0.08078295  1.40303917 -0.1287341
## [4,] -0.21466632  0.75342007 -0.12873408 15.7944463
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.83      NA      NA
## s(time):aet   50.00  20.96    0.86    0.40
## s(time):q     30.00   7.49    0.86    0.42
## s(time):tmax 100.00  61.75    0.86    0.47
mgcv::plot.gam(
  x          = vivax_tvcm_4, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_4)
##          para s(district) s(time):aet s(time):q s(time):tmax
## worst       1  1.00000000   0.9842730 0.8891322    0.9943222
## observed    1  0.29327672   0.9598504 0.7724446    0.9759755
## estimate    1  0.04670974   0.9672774 0.7867258    0.9663222
mgcv::concurvity(vivax_tvcm_4, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmax
## para         1.0000000  0.02040816   0.3307154 0.2365662    0.3288979
## s(district)  1.0000000  1.00000000   0.3318894 0.2629895    0.3382887
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6665718    0.9495005
## s(time):q    0.6399314  0.01471070   0.5606215 1.0000000    0.5966078
## s(time):tmax 0.9593538  0.02031868   0.9563304 0.7073331    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_4 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.25 sec elapsed
cat("\nConverged?", falciparum_tvcm_4$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_4)
## 
## Family: Negative Binomial(0.511) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp", 
##     k = 30) + s(time, by = tmax, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4326     0.3913     -19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.546  49.00 169.362  <2e-16 ***
## s(time):aet   8.782  10.58  13.646  <2e-16 ***
## s(time):q     8.412  10.13  12.090  <2e-16 ***
## s(time):tmax 56.118  68.28   6.309  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.335   Deviance explained = 65.4%
## fREML =  13741  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_4, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 4.826851e-08 8.494352e-08 8.504216e-08 7.977093e-07
## 
## $hess
##              [,1]        [,2]         [,3]         [,4]
## [1,] 22.692965527  0.01110830 -0.005933296 -0.194848153
## [2,]  0.011108304  1.34112132  0.082482853 -0.113475094
## [3,] -0.005933296  0.08248285  1.760673570 -0.001891677
## [4,] -0.194848153 -0.11347509 -0.001891677 11.313262733
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.55      NA      NA
## s(time):aet   50.00   8.78    0.82    0.30
## s(time):q     30.00   8.41    0.82    0.22
## s(time):tmax 100.00  56.12    0.82    0.30
mgcv::plot.gam(
  x          = falciparum_tvcm_4, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_4)
##          para s(district) s(time):aet s(time):q s(time):tmax
## worst       1  1.00000000   0.9842730 0.8891322    0.9943222
## observed    1  0.30712502   0.9558048 0.7845443    0.9751917
## estimate    1  0.04670974   0.9672780 0.7866885    0.9663055
mgcv::concurvity(falciparum_tvcm_4, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):q s(time):tmax
## para         1.0000000  0.02040816   0.3306742 0.2367472    0.3284921
## s(district)  1.0000000  1.00000000   0.3318475 0.2631846    0.3378704
## s(time):aet  0.9668224  0.01992532   1.0000000 0.6665000    0.9495031
## s(time):q    0.6399314  0.01471070   0.5605250 1.0000000    0.5965507
## s(time):tmax 0.9593538  0.02031868   0.9563268 0.7072343    1.0000000

TVCM: Model 5

Predictors: aet, prcp, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_5_poiss <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = poisson(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.62 sec elapsed
cat("\nConverged?", vivax_tvcm_5_poiss$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_poiss)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -7.1645     0.3114  -23.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq p-value    
## s(district)  47.91  49.00 484098  <2e-16 ***
## s(time):aet  49.34  49.96  11120  <2e-16 ***
## s(time):prcp 29.71  29.99   5362  <2e-16 ***
## s(time):tmin 99.10  99.96  16965  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.597   Deviance explained = 74.6%
## fREML = 1.8194e+05  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_poiss, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  1.890420e-05  1.178532e-07  3.025197e-07 -2.230119e-08
## 
## $hess
##              [,1]         [,2]         [,3]         [,4]
## [1,] 23.591754270 -0.001002867 -0.001194892 -0.002640216
## [2,] -0.001002867 22.948752544 -0.076623413 -0.565119285
## [3,] -0.001194892 -0.076623413 13.053785783 -0.196850760
## [4,] -0.002640216 -0.565119285 -0.196850760 46.999138832
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                 k'   edf k-index p-value
## s(district)   49.0  47.9      NA      NA
## s(time):aet   50.0  49.3    0.99    0.66
## s(time):prcp  30.0  29.7    0.99    0.68
## s(time):tmin 100.0  99.1    0.99    0.69
sim_res_vivax_tvcm_5_poiss <- simulateResiduals(
  fittedModel = vivax_tvcm_5_poiss,
  integerResponse = TRUE
)
plot(sim_res_vivax_tvcm_5_poiss)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

testDispersion(sim_res_vivax_tvcm_5_poiss)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 117.02, p-value < 2.2e-16
## alternative hypothesis: two.sided
tictoc::tic("GAM model fitting")
vivax_tvcm_5 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5)
## 
## Family: Negative Binomial(0.84) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.6239     0.3372  -19.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.844 49.000 300.903  < 2e-16 ***
## s(time):aet   5.416  6.409   3.910 0.000509 ***
## s(time):prcp  5.516  6.620   4.696 6.76e-05 ***
## s(time):tmin 67.415 80.126   9.273  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.495   Deviance explained = 67.8%
## fREML =  14796  Scale est. = 1         n = 9996
sim_res_vivax_tvcm_5 <- simulateResiduals(
  fittedModel = vivax_tvcm_5,
  integerResponse = TRUE,
  n = 1000
)
plot(sim_res_vivax_tvcm_5)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

testDispersion(sim_res_vivax_tvcm_5)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.5403, p-value < 2.2e-16
## alternative hypothesis: two.sided
mgcv::summary.gam(vivax_tvcm_5)$s.table
##                    edf    Ref.df          F      p-value
## s(district)  47.844266 49.000000 300.902594 0.000000e+00
## s(time):aet   5.416405  6.408889   3.909630 5.088126e-04
## s(time):prcp  5.515848  6.619505   4.695992 6.762316e-05
## s(time):tmin 67.414970 80.126333   9.272846 0.000000e+00
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 4.727969e-09 2.539192e-08 1.453370e-08 1.072172e-07
## 
## $hess
##             [,1]         [,2]         [,3]        [,4]
## [1,] 23.51411546 -0.002954960 -0.003977930 -0.08878888
## [2,] -0.00295496  1.483205961  0.003150312 -0.18796755
## [3,] -0.00397793  0.003150312  1.024386783  0.09059968
## [4,] -0.08878888 -0.187967553  0.090599677 22.36730526
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.84      NA      NA
## s(time):aet   50.00   5.42    0.87    0.48
## s(time):prcp  30.00   5.52    0.87    0.46
## s(time):tmin 100.00  67.41    0.87    0.44
mgcv::plot.gam(
  x          = vivax_tvcm_5, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_5)
##          para s(district) s(time):aet s(time):prcp s(time):tmin
## worst       1  1.00000000   0.9809642    0.9594399    0.9914194
## observed    1  0.14249995   0.9593331    0.8891144    0.9411815
## estimate    1  0.04423609   0.9626495    0.9002785    0.9691286
mgcv::concurvity(vivax_tvcm_5, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmin
## para         1.0000000  0.02040816   0.3303029    0.2956675    0.3329571
## s(district)  1.0000000  1.00000000   0.3314755    0.3092617    0.3388226
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8019846    0.9386692
## s(time):prcp 0.8070714  0.01738709   0.7473701    1.0000000    0.8520751
## s(time):tmin 0.9692789  0.02029111   0.9515784    0.8770551    1.0000000
tictoc::tic("GAM model fitting")
vivax_tvcm_5 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.02 sec elapsed
cat("\nConverged?", vivax_tvcm_5$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5)
## 
## Family: Negative Binomial(0.84) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.6239     0.3372  -19.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.844 49.000 300.903  < 2e-16 ***
## s(time):aet   5.416  6.409   3.910 0.000509 ***
## s(time):prcp  5.516  6.620   4.696 6.76e-05 ***
## s(time):tmin 67.415 80.126   9.273  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.495   Deviance explained = 67.8%
## fREML =  14796  Scale est. = 1         n = 9996

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_5 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.04 sec elapsed
cat("\nConverged?", falciparum_tvcm_5$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5)
## 
## Family: Negative Binomial(0.502) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9621     0.4015  -19.83   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.581 49.000 184.640  <2e-16 ***
## s(time):aet   5.284  6.218  10.061  <2e-16 ***
## s(time):prcp  7.065  8.515   6.817  <2e-16 ***
## s(time):tmin 57.415 69.616   5.800  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.37   Deviance explained = 64.8%
## fREML =  13751  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 3.410725e-08 2.814923e-08 4.201096e-08 3.672202e-07
## 
## $hess
##              [,1]         [,2]         [,3]        [,4]
## [1,] 22.866506243 -0.008333529 -0.002181398 -0.08758758
## [2,] -0.008333529  1.037326967  0.022318126 -0.12070199
## [3,] -0.002181398  0.022318126  1.485439464 -0.17431039
## [4,] -0.087587583 -0.120701995 -0.174310387 13.33736634
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.58      NA      NA
## s(time):aet   50.00   5.28    0.83    0.54
## s(time):prcp  30.00   7.06    0.83    0.53
## s(time):tmin 100.00  57.41    0.83    0.53
mgcv::plot.gam(
  x          = falciparum_tvcm_5, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_5)
##          para s(district) s(time):aet s(time):prcp s(time):tmin
## worst       1  1.00000000   0.9809642    0.9594399    0.9914194
## observed    1  0.10468955   0.9629736    0.8992429    0.9407605
## estimate    1  0.04423609   0.9626799    0.9002293    0.9691427
mgcv::concurvity(falciparum_tvcm_5, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmin
## para         1.0000000  0.02040816   0.3318222    0.2976411    0.3338610
## s(district)  1.0000000  1.00000000   0.3330002    0.3113452    0.3397427
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8017250    0.9386454
## s(time):prcp 0.8070714  0.01738709   0.7472135    1.0000000    0.8520028
## s(time):tmin 0.9692789  0.02029111   0.9515499    0.8768750    1.0000000

TVCM: Model 6

Predictors: aet, prcp,tmax

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_6 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.15 sec elapsed
cat("\nConverged?", vivax_tvcm_6$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_6)
## 
## Family: Negative Binomial(0.871) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.1283     0.3284  -18.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.829 48.000 275.840  <2e-16 ***
## s(time):aet  20.876 24.824  12.555  <2e-16 ***
## s(time):prcp  7.422  8.921  11.181  <2e-16 ***
## s(time):tmax 61.736 74.415   8.656  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.49   Deviance explained = 68.9%
## fREML =  14796  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_6, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 5.493435e-08 4.307864e-07 1.125115e-07 1.064589e-06
## 
## $hess
##             [,1]        [,2]        [,3]       [,4]
## [1,] 23.42161501 -0.02076764 -0.01220772 -0.2133830
## [2,] -0.02076764  3.16185881  0.10053404  0.7462853
## [3,] -0.01220772  0.10053404  1.40780965 -0.1349497
## [4,] -0.21338302  0.74628531 -0.13494969 16.0251695
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.83      NA      NA
## s(time):aet   50.00  20.88    0.87    0.78
## s(time):prcp  30.00   7.42    0.87    0.79
## s(time):tmax 100.00  61.74    0.87    0.84
mgcv::plot.gam(
  x          = vivax_tvcm_6, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_6)
##          para s(district) s(time):aet s(time):prcp s(time):tmax
## worst       1  1.00000000   0.9837593    0.9379012    0.9943577
## observed    1  0.29169948   0.9644273    0.8742674    0.9761808
## estimate    1  0.04660462   0.9691804    0.8762339    0.9666034
mgcv::concurvity(vivax_tvcm_6, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmax
## para         1.0000000  0.02040816   0.3314476    0.2967447    0.3299469
## s(district)  1.0000000  1.00000000   0.3326250    0.3104059    0.3393724
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8018809    0.9494858
## s(time):prcp 0.8070714  0.01738709   0.7473645    1.0000000    0.7652660
## s(time):tmax 0.9593538  0.02031868   0.9563235    0.8155483    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_6 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = aet,  bs = "tp", k = 50) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.55 sec elapsed
cat("\nConverged?", falciparum_tvcm_6$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_6)
## 
## Family: Negative Binomial(0.51) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp, 
##     bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4132     0.3923   -18.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.545 49.000 169.268  <2e-16 ***
## s(time):aet   8.169  9.826  11.746  <2e-16 ***
## s(time):prcp  8.717 10.508  11.394  <2e-16 ***
## s(time):tmax 56.527 68.730   6.357  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.336   Deviance explained = 65.4%
## fREML =  13741  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_6, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -3.079698e-09 -5.460274e-09 -7.074117e-09 -5.653783e-08
## 
## $hess
##              [,1]         [,2]         [,3]        [,4]
## [1,] 22.691409665  0.007411369 -0.003187324 -0.18669615
## [2,]  0.007411369  1.255975890  0.180441245 -0.11942284
## [3,] -0.003187324  0.180441245  1.649816644 -0.08929099
## [4,] -0.186696151 -0.119422841 -0.089290993 11.38000552
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.55      NA      NA
## s(time):aet   50.00   8.17    0.81    0.20
## s(time):prcp  30.00   8.72    0.81    0.22
## s(time):tmax 100.00  56.53    0.81    0.20
mgcv::plot.gam(
  x          = falciparum_tvcm_6, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_6)
##          para s(district) s(time):aet s(time):prcp s(time):tmax
## worst       1  1.00000000   0.9837593    0.9379012    0.9943577
## observed    1  0.30596140   0.9614097    0.8819831    0.9754096
## estimate    1  0.04660462   0.9691827    0.8761934    0.9665812
mgcv::concurvity(falciparum_tvcm_6, full = FALSE)$estimate
##                   para s(district) s(time):aet s(time):prcp s(time):tmax
## para         1.0000000  0.02040816   0.3314522    0.2971256    0.3294596
## s(district)  1.0000000  1.00000000   0.3326289    0.3108008    0.3388704
## s(time):aet  0.9668224  0.01992532   1.0000000    0.8018003    0.9494860
## s(time):prcp 0.8070714  0.01738709   0.7472604    1.0000000    0.7652080
## s(time):tmax 0.9593538  0.02031868   0.9563178    0.8154353    1.0000000

TVCM: Model 7

Predictors: q, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_7 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.8 sec elapsed
cat("\nConverged?", vivax_tvcm_7$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_7)
## 
## Family: Negative Binomial(0.837) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = q, bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.7473     0.3282  -20.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F  p-value    
## s(district)  47.845 49.000 301.485  < 2e-16 ***
## s(time):q     5.621  6.744   4.504 8.22e-05 ***
## s(time):tmin 69.242 81.867  17.853  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.491   Deviance explained = 67.7%
## fREML =  14790  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_7, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 1.735192e-09 9.644842e-09 1.175017e-07
## 
## $hess
##              [,1]         [,2]         [,3]
## [1,] 23.514687561 -0.002282676 -0.084396380
## [2,] -0.002282676  0.794182266  0.005866442
## [3,] -0.084396380  0.005866442 23.869537772
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.84      NA      NA
## s(time):q     30.00   5.62    0.87    0.32
## s(time):tmin 100.00  69.24    0.87    0.34
mgcv::plot.gam(
  x          = vivax_tvcm_7, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_7)
##          para s(district) s(time):q s(time):tmin
## worst       1  1.00000000 0.8907614    0.9887135
## observed    1  0.13089053 0.7936389    0.5441603
## estimate    1  0.03893871 0.8068839    0.7975255
mgcv::concurvity(vivax_tvcm_7, full = FALSE)$estimate
##                   para s(district) s(time):q s(time):tmin
## para         1.0000000  0.02040816 0.2328648    0.3273184
## s(district)  1.0000000  1.00000000 0.2588091    0.3330937
## s(time):q    0.6399314  0.01471070 1.0000000    0.7094052
## s(time):tmin 0.9692789  0.02029111 0.7819597    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_7 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.77 sec elapsed
cat("\nConverged?", falciparum_tvcm_7$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_7)
## 
## Family: Negative Binomial(0.497) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = q, bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.2864     0.3876  -21.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.581 48.000 187.383  <2e-16 ***
## s(time):q     7.353  8.859   7.328  <2e-16 ***
## s(time):tmin 58.272 70.475  11.305  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.362   Deviance explained = 64.5%
## fREML =  13748  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_7, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 3.487436e-07 1.036569e-07 8.872672e-07
## 
## $hess
##               [,1]          [,2]        [,3]
## [1,] 22.8647400856 -0.0009929983 -0.08624672
## [2,] -0.0009929983  1.4679490158 -0.42285662
## [3,] -0.0862467227 -0.4228566198 14.85758815
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.58      NA      NA
## s(time):q     30.00   7.35     0.8    0.31
## s(time):tmin 100.00  58.27     0.8    0.32
mgcv::plot.gam(
  x          = falciparum_tvcm_7, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_7)
##          para s(district) s(time):q s(time):tmin
## worst       1  1.00000000 0.8907614    0.9887135
## observed    1  0.09446697 0.7922898    0.5988347
## estimate    1  0.03893871 0.8069318    0.7975795
mgcv::concurvity(falciparum_tvcm_7, full = FALSE)$estimate
##                   para s(district) s(time):q s(time):tmin
## para         1.0000000  0.02040816 0.2327271    0.3274677
## s(district)  1.0000000  1.00000000 0.2586449    0.3332449
## s(time):q    0.6399314  0.01471070 1.0000000    0.7094382
## s(time):tmin 0.9692789  0.02029111 0.7820277    1.0000000

TVCM: Model 8

Predictors: q, tmax

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_8 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.73 sec elapsed
cat("\nConverged?", vivax_tvcm_8$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_8)
## 
## Family: Negative Binomial(0.835) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = q, bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.3144     0.3291  -19.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df      F p-value    
## s(district)  47.833  48.00 268.49  <2e-16 ***
## s(time):q     9.554  11.53  10.24  <2e-16 ***
## s(time):tmax 66.884  79.60  15.70  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.486   Deviance explained = 67.7%
## fREML =  14787  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_8, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 7.924974e-08 1.441898e-07 1.283543e-06
## 
## $hess
##             [,1]        [,2]       [,3]
## [1,] 23.42977248 -0.01448436 -0.3194353
## [2,] -0.01448436  1.82053726 -0.2243699
## [3,] -0.31943527 -0.22436988 21.0173453
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.83      NA      NA
## s(time):q     30.00   9.55    0.86    0.14
## s(time):tmax 100.00  66.88    0.86    0.15
mgcv::plot.gam(
  x          = vivax_tvcm_8, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_8)
##          para s(district) s(time):q s(time):tmax
## worst       1  1.00000000 0.8760397    0.9914966
## observed    1  0.30259653 0.7419765    0.7641925
## estimate    1  0.04152774 0.7615721    0.7242683
mgcv::concurvity(vivax_tvcm_8, full = FALSE)$estimate
##                   para s(district) s(time):q s(time):tmax
## para         1.0000000  0.02040816 0.2337196    0.3242180
## s(district)  1.0000000  1.00000000 0.2597761    0.3334529
## s(time):q    0.6399314  0.01471070 1.0000000    0.5965156
## s(time):tmax 0.9593538  0.02031868 0.7074298    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_8 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = q,    bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.1 sec elapsed
cat("\nConverged?", falciparum_tvcm_8$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_8)
## 
## Family: Negative Binomial(0.498) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = q, bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3781     0.3854  -19.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df      F p-value    
## s(district)  47.544  48.00 170.31  <2e-16 ***
## s(time):q     8.954  10.79  11.10  <2e-16 ***
## s(time):tmax 54.214  66.05  12.09  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.36   Deviance explained = 64.6%
## fREML =  13735  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_8, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 3.402689e-08 2.992471e-08 2.725303e-07
## 
## $hess
##               [,1]          [,2]       [,3]
## [1,] 22.7011775413 -0.0002516576 -0.2702655
## [2,] -0.0002516576  1.7900699736 -0.1938459
## [3,] -0.2702654828 -0.1938458871 10.9574371
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.54      NA      NA
## s(time):q     30.00   8.95    0.85    0.96
## s(time):tmax 100.00  54.21    0.85    0.95
mgcv::plot.gam(
  x          = falciparum_tvcm_8, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_8)
##          para s(district) s(time):q s(time):tmax
## worst       1  1.00000000 0.8760397    0.9914966
## observed    1  0.30375752 0.7739353    0.8719470
## estimate    1  0.04152774 0.7613646    0.7234999
mgcv::concurvity(falciparum_tvcm_8, full = FALSE)$estimate
##                   para s(district) s(time):q s(time):tmax
## para         1.0000000  0.02040816 0.2317413    0.3223674
## s(district)  1.0000000  1.00000000 0.2575195    0.3315370
## s(time):q    0.6399314  0.01471070 1.0000000    0.5965881
## s(time):tmax 0.9593538  0.02031868 0.7078390    1.0000000

TVCM: Model 9

Predictors: prcp, tmin

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_9 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.69 sec elapsed
cat("\nConverged?", vivax_tvcm_9$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_9)
## 
## Family: Negative Binomial(0.837) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmin, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.7056     0.3289  -20.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df       F  p-value    
## s(district)  47.84 48.000 308.263  < 2e-16 ***
## s(time):prcp  5.65  6.787   4.975 2.26e-05 ***
## s(time):tmin 68.84 81.469  14.750  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.493   Deviance explained = 67.7%
## fREML =  14789  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_9, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 1.389883e-08 1.636942e-08 2.177923e-07
## 
## $hess
##              [,1]         [,2]        [,3]
## [1,] 23.515078667 -0.003024977 -0.07629346
## [2,] -0.003024977  1.088001137  0.10296537
## [3,] -0.076293460  0.102965374 23.71558050
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value  
## s(district)   49.00  47.84      NA      NA  
## s(time):prcp  30.00   5.65    0.84   0.015 *
## s(time):tmin 100.00  68.84    0.84   0.025 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
  x          = vivax_tvcm_9, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_9)
##          para s(district) s(time):prcp s(time):tmin
## worst       1  1.00000000    0.9495201    0.9886612
## observed    1  0.12663975    0.8834001    0.6545112
## estimate    1  0.03869671    0.8920174    0.8931749
mgcv::concurvity(vivax_tvcm_9, full = FALSE)$estimate
##                   para s(district) s(time):prcp s(time):tmin
## para         1.0000000  0.02040816    0.2971182    0.3327393
## s(district)  1.0000000  1.00000000    0.3107984    0.3386034
## s(time):prcp 0.8070714  0.01738709    1.0000000    0.8520021
## s(time):tmin 0.9692789  0.02029111    0.8768477    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_9 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmin, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.78 sec elapsed
cat("\nConverged?", falciparum_tvcm_9$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_9)
## 
## Family: Negative Binomial(0.496) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmin, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.1803     0.3887  -21.04   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.582 48.000 187.426  <2e-16 ***
## s(time):prcp  7.433  8.971   7.337  <2e-16 ***
## s(time):tmin 57.569 69.687   8.646  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.367   Deviance explained = 64.5%
## fREML =  13747  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_9, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 5.041353e-07 1.378462e-07 1.259055e-06
## 
## $hess
##              [,1]         [,2]        [,3]
## [1,] 22.865004984 -0.001410579 -0.08140897
## [2,] -0.001410579  1.659746451 -0.34199534
## [3,] -0.081408969 -0.341995341 14.33586510
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.58      NA      NA
## s(time):prcp  30.00   7.43    0.82    0.57
## s(time):tmin 100.00  57.57    0.82    0.54
mgcv::plot.gam(
  x          = falciparum_tvcm_9, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_9)
##          para s(district) s(time):prcp s(time):tmin
## worst       1  1.00000000    0.9495201    0.9886612
## observed    1  0.09182003    0.8881393    0.7222467
## estimate    1  0.03869671    0.8919952    0.8926087
mgcv::concurvity(falciparum_tvcm_9, full = FALSE)$estimate
##                   para s(district) s(time):prcp s(time):tmin
## para         1.0000000  0.02040816    0.2925201    0.3285027
## s(district)  1.0000000  1.00000000    0.3059347    0.3342968
## s(time):prcp 0.8070714  0.01738709    1.0000000    0.8520700
## s(time):tmin 0.9692789  0.02029111    0.8771001    1.0000000

TVCM: Model 10

Predictors: prcp, tmax

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_10 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.08 sec elapsed
cat("\nConverged?", vivax_tvcm_10$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_10)
## 
## Family: Negative Binomial(0.841) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmax, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.2996     0.3301  -19.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df      F p-value    
## s(district)  47.83  48.00 269.75  <2e-16 ***
## s(time):prcp 11.68  14.08  12.81  <2e-16 ***
## s(time):tmax 67.03  79.72  12.33  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.483   Deviance explained = 67.9%
## fREML =  14788  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_10, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 9.338716e-09 2.790006e-08 1.704440e-07
## 
## $hess
##             [,1]        [,2]       [,3]
## [1,] 23.42986869 -0.01280447 -0.2989760
## [2,] -0.01280447  1.60238903 -0.3085009
## [3,] -0.29897601 -0.30850091 21.1112544
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                 k'   edf k-index p-value
## s(district)   49.0  47.8      NA      NA
## s(time):prcp  30.0  11.7    0.88    0.71
## s(time):tmax 100.0  67.0    0.88    0.71
mgcv::plot.gam(
  x          = vivax_tvcm_10, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_10)
##          para s(district) s(time):prcp s(time):tmax
## worst       1  1.00000000    0.9230594    0.9915651
## observed    1  0.30114341    0.8477614    0.8215732
## estimate    1  0.04119987    0.8548854    0.8369062
mgcv::concurvity(vivax_tvcm_10, full = FALSE)$estimate
##                   para s(district) s(time):prcp s(time):tmax
## para         1.0000000  0.02040816    0.2970109    0.3284053
## s(district)  1.0000000  1.00000000    0.3106873    0.3377823
## s(time):prcp 0.8070714  0.01738709    1.0000000    0.7651786
## s(time):tmax 0.9593538  0.02031868    0.8152860    1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_10 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) +
    s(district, bs = "re", k = 49) +
    s(time, by = prcp, bs = "tp", k = 30) +
    s(time, by = tmax, bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
## Warning in estimate.theta(theta, family, y, mu, scale = scale1, wt = G$w, : step
## failure in theta estimation

## Warning in estimate.theta(theta, family, y, mu, scale = scale1, wt = G$w, : step
## failure in theta estimation
tictoc::toc()
## GAM model fitting: 2.14 sec elapsed
cat("\nConverged?", falciparum_tvcm_10$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_10)
## 
## Family: Negative Binomial(0.501) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) + 
##     s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmax, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3161     0.3871   -18.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df       F p-value    
## s(district)  47.545  48.00 170.810  <2e-16 ***
## s(time):prcp  9.846  11.88  12.480  <2e-16 ***
## s(time):tmax 54.362  66.20   9.387  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.359   Deviance explained = 64.8%
## fREML =  13734  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_10, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 4.173538e-08 8.067642e-09 2.735690e-08
## 
## $hess
##              [,1]         [,2]       [,3]
## [1,] 22.701622809  0.004404026 -0.2479362
## [2,]  0.004404026  1.981886586 -0.2084873
## [3,] -0.247936218 -0.208487301 10.3205637
## 
## Model rank =  180 / 180 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(district)   49.00  47.54      NA      NA
## s(time):prcp  30.00   9.85    0.81    0.26
## s(time):tmax 100.00  54.36    0.81    0.23
mgcv::plot.gam(
  x          = falciparum_tvcm_10, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_10)
##          para s(district) s(time):prcp s(time):tmax
## worst       1  1.00000000    0.9230594    0.9915651
## observed    1  0.30310171    0.8717537    0.9064501
## estimate    1  0.04119987    0.8546375    0.8361792
mgcv::concurvity(falciparum_tvcm_10, full = FALSE)$estimate
##                   para s(district) s(time):prcp s(time):tmax
## para         1.0000000  0.02040816    0.2934553    0.3254020
## s(district)  1.0000000  1.00000000    0.3069260    0.3346757
## s(time):prcp 0.8070714  0.01738709    1.0000000    0.7652822
## s(time):tmax 0.9593538  0.02031868    0.8156583    1.0000000

Model selection

P. vivax

AIC(
  vivax_tvcm_0,
  vivax_tvcm_1,
  vivax_tvcm_2,
  vivax_tvcm_3,
  vivax_tvcm_4,
  vivax_tvcm_5,
  vivax_tvcm_6,
  vivax_tvcm_7,
  vivax_tvcm_8,
  vivax_tvcm_9,
  vivax_tvcm_10
) -> vivax_tvcm_aic
vivax_tvcms <- list(
  vivax_tvcm_0,
  vivax_tvcm_1,
  vivax_tvcm_2,
  vivax_tvcm_3,
  vivax_tvcm_4,
  vivax_tvcm_5,
  vivax_tvcm_6,
  vivax_tvcm_7,
  vivax_tvcm_8,
  vivax_tvcm_9,
  vivax_tvcm_10
)

n_models <- length(vivax_tvcms)
vivax_tvcm_dev <- rep(0, n_models)

for(i in 1:n_models) {
  vivax_tvcm_dev[i] <- deviance(vivax_tvcms[[i]])
}
eval_vivax_tvcm <- tibble::tibble(
  model    = row.names(vivax_tvcm_aic),
  df       = round(vivax_tvcm_aic$df, 2),
  aic      = round(vivax_tvcm_aic$AIC, 2),
  deviance = round(vivax_tvcm_dev, 2)
)
eval_vivax_tvcm$aicDiff <- round(AIC(vivax_tvcm_0) - eval_vivax_tvcm$aic, 2)
eval_vivax_tvcm
## # A tibble: 11 x 5
##    model            df    aic deviance aicDiff
##    <chr>         <dbl>  <dbl>    <dbl>   <dbl>
##  1 vivax_tvcm_0   161. 70161.   10410.    0   
##  2 vivax_tvcm_1   159. 70159.   10410.    1.69
##  3 vivax_tvcm_2   159. 70161.   10411.   -0.24
##  4 vivax_tvcm_3   131. 70780.   10491. -619.  
##  5 vivax_tvcm_4   143. 70476.   10442. -315.  
##  6 vivax_tvcm_5   130. 70783.   10491. -622.  
##  7 vivax_tvcm_6   143. 70475.   10442. -314.  
##  8 vivax_tvcm_7   127. 70798.   10487. -637.  
##  9 vivax_tvcm_8   128. 70809.   10479. -648.  
## 10 vivax_tvcm_9   126. 70797.   10488. -636.  
## 11 vivax_tvcm_10  131. 70752.   10470. -591.
# readr::write_csv(eval_vivax_tvcm, "eval_vivax_tvcm.csv")

P. falciparum

AIC(
  falciparum_tvcm_0,
  falciparum_tvcm_1,
  falciparum_tvcm_2,
  falciparum_tvcm_3,
  falciparum_tvcm_4,
  falciparum_tvcm_5,
  falciparum_tvcm_6,
  falciparum_tvcm_7,
  falciparum_tvcm_8,
  falciparum_tvcm_9,
  falciparum_tvcm_10
) -> falciparum_tvcm_aic
falciparum_tvcms <- list(
  falciparum_tvcm_0,
  falciparum_tvcm_1,
  falciparum_tvcm_2,
  falciparum_tvcm_3,
  falciparum_tvcm_4,
  falciparum_tvcm_5,
  falciparum_tvcm_6,
  falciparum_tvcm_7,
  falciparum_tvcm_8,
  falciparum_tvcm_9,
  falciparum_tvcm_10
)

n_models <- length(falciparum_tvcms)
falciparum_tvcm_dev <- rep(0, n_models)

for(i in 1:n_models) {
  falciparum_tvcm_dev[i] <- deviance(falciparum_tvcms[[i]])
}
eval_falciparum_tvcm <- tibble::tibble(
  model    = row.names(falciparum_tvcm_aic),
  df       = round(falciparum_tvcm_aic$df, 2),
  aic      = round(falciparum_tvcm_aic$AIC, 2),
  deviance = round(falciparum_tvcm_dev, 2)
)
eval_falciparum_tvcm$aicDiff <- round(AIC(falciparum_tvcm_0) - eval_falciparum_tvcm$aic, 2)
eval_falciparum_tvcm
## # A tibble: 11 x 5
##    model                 df    aic deviance aicDiff
##    <chr>              <dbl>  <dbl>    <dbl>   <dbl>
##  1 falciparum_tvcm_0   150. 45901.    8423.    0   
##  2 falciparum_tvcm_1   148. 45902.    8425.   -1.43
##  3 falciparum_tvcm_2   148. 45905.    8425.   -4.51
##  4 falciparum_tvcm_3   123. 46208.    8486. -307.  
##  5 falciparum_tvcm_4   126. 46088.    8454. -187.  
##  6 falciparum_tvcm_5   122. 46214.    8486. -313.  
##  7 falciparum_tvcm_6   126. 46091.    8454. -190.  
##  8 falciparum_tvcm_7   117. 46269.    8492. -368.  
##  9 falciparum_tvcm_8   115. 46234.    8484. -333.  
## 10 falciparum_tvcm_9   117. 46270.    8492. -370.  
## 11 falciparum_tvcm_10  116. 46206.    8477. -305.
# readr::write_csv(eval_falciparum_tvcm, "eval_falciparum_tvcm.csv")

Figure 2

df_pamafro_period <- tibble::tibble(
  start = lubridate::make_datetime(year = 2005, month = 10, day = 1L), 
  end   = lubridate::make_datetime(year = 2010, month = 09, day = 1L)
)
tvcm_5_terms <- c(
  "Actual Evapotranspiration",
  "Precipitation", 
  "Min Temperature"
)
get_tvcoef <- function(plot_vivax, plot_falciparum, labels) {
  nplots_vivax      <- length(plot_vivax)
  nplots_falciparum <- length(plot_falciparum)
  
  assertthat::are_equal(nplots_vivax, nplots_falciparum)
  
  nplots <- nplots_vivax
  
  tvcoef_vivax <-  vector(mode = "list", length = nplots - 1)
  
  for (i in 2:nplots) {
    tvcoef_vivax[[(i-1)]] <- 
      tibble::tibble(
        x        = lubridate::as_datetime(plot_vivax[[i]]$x),
        se       = plot_vivax[[i]]$se,
        fit      = plot_vivax[[i]]$fit,
        term     = labels[(i-1)],
        specie = "vivax"
      )
  }
  
  df_tvcoef_vivax <- dplyr::bind_rows(tvcoef_vivax)
  
  tvcoef_falciparum <-  vector(mode = "list", length = nplots - 1)
  
  for (i in 2:nplots) {
    tvcoef_falciparum[[(i-1)]] <- 
      tibble::tibble(
        x        = lubridate::as_datetime(plot_falciparum[[i]]$x),
        se       = plot_falciparum[[i]]$se,
        fit      = plot_falciparum[[i]]$fit,
        term     = labels[(i-1)],
        specie   = "falciparum"
      )
  }
  
  df_tvcoef_falciparum <- dplyr::bind_rows(tvcoef_falciparum)
  
  df_tvcoef <- dplyr::bind_rows(df_tvcoef_vivax, df_tvcoef_falciparum)
  
  df_tvcoef$term   <- factor(df_tvcoef$term, labels)
  df_tvcoef$specie <- factor(
    x      = df_tvcoef$specie, 
    levels = c("vivax", "falciparum"), 
    labels = c("P. vivax", "P. falciparum")
  )
  
  df_tvcoef
}
plot_tvcm_by_terms <- function(df_tvcm) {
  
  p <- ggplot(df_tvcm)
  
  # Add grid 
  p <- p + facet_wrap(~ term, ncol = 1, scales = "free", strip = "top")
  
  # Add lines
  p <- p + geom_line(mapping = aes(x = x, y = fit, color = specie), size = 1)
  
  # Add confidence intervals
  p <- p + geom_ribbon(
    mapping = aes(
      x    = x,
      y    = fit,
      ymin = fit + se,
      ymax = fit - se,
      fill = specie
    ),
    alpha = 0.1
  )
  
  # Add PAMAFRO's period layer
  p <- p + geom_rect(
    data    = df_pamafro_period,
    mapping = aes(xmin = start, xmax = end), 
    ymin    = -100, 
    ymax    = 100, 
    alpha   = 0.2
  ) 
    
  # Add PAMAFRO's period lines
  p <- p + geom_vline(
    xintercept = c(df_pamafro_period$start, df_pamafro_period$end), 
    linetype   = "dashed", 
    alpha      = 0.6
  )
  
  # Add y = 0 line
  p <- p + geom_hline(mapping = aes(yintercept = 0), alpha = 0.6)
  
  # Scale date
  p <- p + scale_x_datetime(date_labels = "%Y", date_breaks = "1 year")
  
  # Add labels
  p <- p + labs(y = "b(t)", x = NULL)

  # Format layer
  p <- p + theme(legend.position = "top", legend.title = element_blank())
  
  show(p)
}
df_tvcm_5<- get_tvcoef(
  plot_vivax_tvcm_5, 
  plot_falciparum_tvcm_5, 
  tvcm_5_terms
)
plot_tvcm_by_terms(df_tvcm_5)

Table 2

get_period <- function(x) {
  ifelse(
    x < df_pamafro_period$start,
    "before",
    ifelse(
      x > df_pamafro_period$end,
      "after",
      "during"
    )
  )
}
get_ci <- function(x) {
  sprintf("[%s, %s]", round(x[1] - x[2], 2), round(x[1] + x[2], 2))
}
table_02_sketch = htmltools::withTags(
  table(
    class = "display",
    thead(
      tr(
        th(rowspan = 3, "Climate variable"),
        th(colspan = 4, "Before"),
        th(colspan = 4, "During"),
        th(colspan = 4, "After")
      ),
      tr(
        lapply(rep(c("Min", "Max"), 3), th, colspan = 2)
      ),
      tr(
        lapply(rep(c("Estimate", "95% CI"), 6), th)
      )
    )
  )
)
df_tvcm_5$period <- apply(df_tvcm_5[, c("x")], 1, get_period)

df_tvcm_5$period <- factor(
  x      = df_tvcm_5$period,
  levels = c("before", "during", "after")
)
df_tvcm_5 %>% 
  dplyr::group_by(term, period, specie) %>% 
  dplyr::slice_max(fit, n = 1) -> df_tvcm_5_max
df_tvcm_5 %>% 
  dplyr::group_by(term, period, specie) %>% 
  dplyr::slice_min(fit, n = 1) -> df_tvcm_5_min
df_tvcm_5_max$extrema <- rep("max", length(df_tvcm_5_max$fit))
df_tvcm_5_min$extrema <- rep("min", length(df_tvcm_5_min$fit))

df_tvcm_5_extrema <- dplyr::bind_rows(df_tvcm_5_max, df_tvcm_5_min)
df_tvcm_5_extrema$ci <- apply(df_tvcm_5_extrema[, c("fit", "se")], 1, get_ci)

P. vivax

df_tvcm_5_extrema %>% 
  dplyr::filter(specie == "P. vivax") %>% 
  dplyr::select(term, period, extrema, fit, ci) %>%
  dplyr::mutate(fit = round(fit, 4)) %>% 
  tidyr::pivot_wider(
    id_cols     = term,
    names_from  = c(period, extrema),
    values_from = c(fit, ci),
    names_sep   = "_"
  ) %>% 
  dplyr::select(
    term, 
    fit_before_min,
    ci_before_min,
    fit_before_max,
    ci_before_max,
    fit_during_min,
    ci_during_min, 
    fit_during_max,
    ci_during_max,
    fit_after_min,
    ci_after_min, 
    fit_after_max,
    ci_after_max
  ) -> df_tvcm_5_extrema_vivax
df_tvcm_5_extrema_vivax %>% 
  tibble::column_to_rownames(var = "term") %>% 
  DT::datatable(
    container  = table_02_sketch,
    extensions = 'Buttons',
    options    = list(
      dom        = "Bt", 
      scrollX    = TRUE,
      columnDefs = list(list(className = 'dt-center', targets = 6)),
      buttons    = c('copy', 'csv')
    ),
    caption    = "P. vivax"
  )

P. falciparum

df_tvcm_5_extrema %>% 
  dplyr::filter(specie == "P. falciparum") %>% 
  dplyr::select(term, period, extrema, fit, ci) %>%
  dplyr::mutate(fit = round(fit, 4)) %>% 
  tidyr::pivot_wider(
    id_cols     = term,
    names_from  = c(period, extrema),
    values_from = c(fit, ci),
    names_sep   = "_"
  ) %>% 
  dplyr::select(
    term, 
    fit_before_min,
    ci_before_min,
    fit_before_max,
    ci_before_max,
    fit_during_min,
    ci_during_min, 
    fit_during_max,
    ci_during_max,
    fit_after_min,
    ci_after_min, 
    fit_after_max,
    ci_after_max
  ) -> df_tvcm_5_extrema_falciparum
df_tvcm_5_extrema_falciparum %>% 
  tibble::column_to_rownames(var = "term") %>% 
  DT::datatable(
    container  = table_02_sketch,
    extensions = 'Buttons',
    options    = list(
      dom        = "Bt", 
      scrollX    = TRUE,
      columnDefs = list(list(className = 'dt-center', targets = 6)),
      buttons    = c('copy', 'csv')
    ),
    caption    = "P. falciparum"
  )
df_tvcm_5 %>% 
  dplyr::group_by(period, specie, term) %>% 
  #dplyr::arrange(x) %>% 
  dplyr::filter(
    fit == min(fit) | fit == max(fit)
  )
## # A tibble: 36 x 6
## # Groups:   period, specie, term [18]
##    x                      se fit[,1] term                      specie   period
##    <dttm>              <dbl>   <dbl> <fct>                     <fct>    <fct> 
##  1 2001-01-01 00:00:00 0.939 -0.883  Actual Evapotranspiration P. vivax before
##  2 2005-09-30 07:21:31 0.445  0.310  Actual Evapotranspiration P. vivax before
##  3 2005-12-25 23:19:35 0.446  0.314  Actual Evapotranspiration P. vivax during
##  4 2010-08-30 12:24:31 0.482 -0.623  Actual Evapotranspiration P. vivax during
##  5 2012-11-24 15:32:06 0.492 -0.936  Actual Evapotranspiration P. vivax after 
##  6 2017-12-01 00:00:00 0.787  0.657  Actual Evapotranspiration P. vivax after 
##  7 2001-01-01 00:00:00 0.979 -1.90   Precipitation             P. vivax before
##  8 2005-09-30 07:21:31 0.356 -0.0146 Precipitation             P. vivax before
##  9 2006-06-17 07:15:45 0.350  0.0484 Precipitation             P. vivax during
## 10 2010-08-30 12:24:31 0.635 -0.867  Precipitation             P. vivax during
## # ... with 26 more rows

The estimated time-varying coefficients of the climate variables for both models, as well as their approximated 95% confidence interval in each time point on the graph, are shown in Figure 2. As mentioned before, this time-varying coefficients are estimated as smooth functions over time. Although the confidence intervals include the zero most of the time in each case, the global contribution of the estimated smooth functions in the models is significant according to the approximated p-values in the summary table of the fitted model (APPENDIX). Something to highlight is the apparent concurrent drop to a more negative linear contribution of the climate variables at the end of the PAMAFRO intervention. The estimated confidence intervals for the coefficients at the end and some time after the PAMAFRO period are also farther away from zero than anytime in the study. This is more evident in the case of minimum temperature, where a significant drop is visible, for both P. vivax and P. falciparum. In the case of precipitation, this pattern is less evident for P.vivax, and for P. falciparum there seems to be a periodicity in this drop down which casually matches with the end of PAMAFRO intervention.

Maximum and minimum values of the estimated time-varying coefficients for each model before, during, and after the PAMAFRO intervention, including their 95% confidence intervals, are presented in Table 2. Before the intervention

P. vivax

Use max and min

  • Actual evapotranspiration

Before PAMAFRO, the actual evapotranspiration effect’s had a change of direction from being negative to being positive, going from -0.93 (95% CI: []) at the begining of this period to 0.24 at the end, which means a 74% decrease in the effect’s magnitude. During PAMAFRO, an opposite change of direction occurred, going from 0.24 to -0.67, which means a 177% increase in the effect’s magnitude.

After PAMAFRO, a change in direction in the effect similar to that of the first period happened, going from -0.68 to 0.63, which means a 7% decrease in the effect’s magnitude.

  • Precipitation

Before PAMAFRO, the precipitation had a negative effect throughout this period, but its magnitude decreased by 98%, going from -1.81 (CI) to -0.04 (CI).

During PAMAFRO, the precipitation continued to have a negative effect on the incidence, but it increased its magnitude 17 times, going from -0.03 (CI) to -0.53 (CI).

After PAMAFRO, a change in direction in the effect similar to that of the first period happened, going from -0.68 to 0.63, which means a 7% decrease in the effect’s magnitude.

  • Min temperature

Distributed lag models with time-varying coefficients

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvc_dlm_5 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet,         bs = "tp", k = 50) +
    s(time, by = aet_lag1,    bs = "tp", k = 50) +
    s(time, by = aet_lag3,    bs = "tp", k = 50) +
    s(time, by = aet_lag6,    bs = "tp", k = 50) +
    s(time, by = aet_lag12,   bs = "tp", k = 50) +
    s(time, by = prcp,        bs = "tp", k = 30) +
    s(time, by = prcp_lag1,   bs = "tp", k = 30) +
    s(time, by = prcp_lag3,   bs = "tp", k = 30) +
    s(time, by = prcp_lag6,   bs = "tp", k = 30) +
    s(time, by = prcp_lag12,  bs = "tp", k = 30) +
    s(time, by = tmin,        bs = "tp", k = 100) +
    s(time, by = tmin_lag1,   bs = "tp", k = 100) +
    s(time, by = tmin_lag3,   bs = "tp", k = 100) +
    s(time, by = tmin_lag6,   bs = "tp", k = 100) +
    s(time, by = tmin_lag12,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 18.39 sec elapsed
cat("\nConverged?", vivax_tvc_dlm_5$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvc_dlm_5)
## 
## Family: Negative Binomial(0.87) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet, bs = "tp", k = 50) + s(time, by = aet_lag1, bs = "tp", 
##     k = 50) + s(time, by = aet_lag3, bs = "tp", k = 50) + s(time, 
##     by = aet_lag6, bs = "tp", k = 50) + s(time, by = aet_lag12, 
##     bs = "tp", k = 50) + s(time, by = prcp, bs = "tp", k = 30) + 
##     s(time, by = prcp_lag1, bs = "tp", k = 30) + s(time, by = prcp_lag3, 
##     bs = "tp", k = 30) + s(time, by = prcp_lag6, bs = "tp", k = 30) + 
##     s(time, by = prcp_lag12, bs = "tp", k = 30) + s(time, by = tmin, 
##     bs = "tp", k = 100) + s(time, by = tmin_lag1, bs = "tp", 
##     k = 100) + s(time, by = tmin_lag3, bs = "tp", k = 100) + 
##     s(time, by = tmin_lag6, bs = "tp", k = 100) + s(time, by = tmin_lag12, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.0038     0.4249  -16.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df       F  p-value    
## s(district)        47.832 48.000 292.630  < 2e-16 ***
## s(time):aet         2.904  3.317   0.329  0.74052    
## s(time):aet_lag1    3.765  4.363   2.410  0.03826 *  
## s(time):aet_lag3    8.352  9.868   3.980 2.02e-05 ***
## s(time):aet_lag6   14.249 16.961   4.343  < 2e-16 ***
## s(time):aet_lag12   3.397  3.904   3.431  0.00808 ** 
## s(time):prcp        6.067  7.276   2.956  0.00448 ** 
## s(time):prcp_lag1   5.134  6.138   5.504 9.58e-06 ***
## s(time):prcp_lag3   3.553  4.158   2.253  0.06032 .  
## s(time):prcp_lag6   2.217  2.387   2.284  0.06635 .  
## s(time):prcp_lag12  2.000  2.000   3.452  0.03172 *  
## s(time):tmin        2.000  2.001   2.107  0.12162    
## s(time):tmin_lag1   7.180  8.562   5.319  < 2e-16 ***
## s(time):tmin_lag3   2.000  2.000   4.501  0.01112 *  
## s(time):tmin_lag6  49.890 61.147   3.579  < 2e-16 ***
## s(time):tmin_lag12  4.317  5.071   2.763  0.01687 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.492   Deviance explained = 68.7%
## fREML =  14848  Scale est. = 1         n = 9996

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvc_dlm_5 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet,         bs = "tp", k = 50) +
    s(time, by = aet_lag1,    bs = "tp", k = 50) +
    s(time, by = aet_lag3,    bs = "tp", k = 50) +
    s(time, by = aet_lag6,    bs = "tp", k = 50) +
    s(time, by = aet_lag12,   bs = "tp", k = 50) +
    s(time, by = prcp,        bs = "tp", k = 30) +
    s(time, by = prcp_lag1,   bs = "tp", k = 30) +
    s(time, by = prcp_lag3,   bs = "tp", k = 30) +
    s(time, by = prcp_lag6,   bs = "tp", k = 30) +
    s(time, by = prcp_lag12,  bs = "tp", k = 30) +
    s(time, by = tmin,        bs = "tp", k = 100) +
    s(time, by = tmin_lag1,   bs = "tp", k = 100) +
    s(time, by = tmin_lag3,   bs = "tp", k = 100) +
    s(time, by = tmin_lag6,   bs = "tp", k = 100) +
    s(time, by = tmin_lag12,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 17.29 sec elapsed
cat("\nConverged?", falciparum_tvc_dlm_5$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvc_dlm_5)
## 
## Family: Negative Binomial(0.525) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet, bs = "tp", k = 50) + s(time, by = aet_lag1, bs = "tp", 
##     k = 50) + s(time, by = aet_lag3, bs = "tp", k = 50) + s(time, 
##     by = aet_lag6, bs = "tp", k = 50) + s(time, by = aet_lag12, 
##     bs = "tp", k = 50) + s(time, by = prcp, bs = "tp", k = 30) + 
##     s(time, by = prcp_lag1, bs = "tp", k = 30) + s(time, by = prcp_lag3, 
##     bs = "tp", k = 30) + s(time, by = prcp_lag6, bs = "tp", k = 30) + 
##     s(time, by = prcp_lag12, bs = "tp", k = 30) + s(time, by = tmin, 
##     bs = "tp", k = 100) + s(time, by = tmin_lag1, bs = "tp", 
##     k = 100) + s(time, by = tmin_lag3, bs = "tp", k = 100) + 
##     s(time, by = tmin_lag6, bs = "tp", k = 100) + s(time, by = tmin_lag12, 
##     bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.774      0.529   -14.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df       F  p-value    
## s(district)        47.568 48.000 186.801  < 2e-16 ***
## s(time):aet         5.578  6.630   1.864 0.076865 .  
## s(time):aet_lag1   10.996 13.021   2.874 0.000367 ***
## s(time):aet_lag3    3.130  3.632  25.079  < 2e-16 ***
## s(time):aet_lag6    9.631 11.417   8.143  < 2e-16 ***
## s(time):aet_lag12   2.000  2.000  17.044  < 2e-16 ***
## s(time):prcp        3.095  3.606   3.463 0.007761 ** 
## s(time):prcp_lag1   4.278  5.094   5.974 1.45e-05 ***
## s(time):prcp_lag3   2.580  2.936   2.592 0.066192 .  
## s(time):prcp_lag6   2.000  2.000   3.654 0.025928 *  
## s(time):prcp_lag12  7.478  8.984   2.352 0.012928 *  
## s(time):tmin        2.001  2.001   0.274 0.760360    
## s(time):tmin_lag1   7.726  9.224   5.876  < 2e-16 ***
## s(time):tmin_lag3   2.000  2.000   5.857 0.002870 ** 
## s(time):tmin_lag6  15.847 18.856   4.286  < 2e-16 ***
## s(time):tmin_lag12 10.520 12.566   2.848 0.000524 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.382   Deviance explained = 66.1%
## fREML =  13735  Scale est. = 1         n = 9996

Figure 3

tvc_dlm_5_terms <- c(
  "Actual Evapotranspiration (lag = 0)",
  "Actual Evapotranspiration (lag = 1)",
  "Actual Evapotranspiration (lag = 3)",
  "Actual Evapotranspiration (lag = 6)",
  "Actual Evapotranspiration (lag = 12)",
  "Precipitation (lag = 0)",
  "Precipitation (lag = 1)",
  "Precipitation (lag = 3)",
  "Precipitation (lag = 6)",
  "Precipitation (lag = 12)",
  "Min Temperature (lag = 0)",
  "Min Temperature (lag = 1)",
  "Min Temperature (lag = 3)",
  "Min Temperature (lag = 6)",
  "Min Temperature (lag = 12)"
)
df_tvc_dlm_5 <- get_tvcoef(
  plot_vivax_tvc_dlm_5, 
  plot_falciparum_tvc_dlm_5, 
  tvc_dlm_5_terms
)
df_tvc_dlm_5
## # A tibble: 15,000 x 5
##    x                      se fit[,1] term                                specie 
##    <dttm>              <dbl>   <dbl> <fct>                               <fct>  
##  1 2001-01-01 00:00:00 0.859  0.0203 Actual Evapotranspiration (lag = 0) P. viv~
##  2 2001-01-13 09:08:17 0.853  0.0203 Actual Evapotranspiration (lag = 0) P. viv~
##  3 2001-01-25 18:16:35 0.847  0.0204 Actual Evapotranspiration (lag = 0) P. viv~
##  4 2001-02-07 03:24:53 0.841  0.0204 Actual Evapotranspiration (lag = 0) P. viv~
##  5 2001-02-19 12:33:11 0.835  0.0205 Actual Evapotranspiration (lag = 0) P. viv~
##  6 2001-03-03 21:41:28 0.829  0.0205 Actual Evapotranspiration (lag = 0) P. viv~
##  7 2001-03-16 06:49:46 0.823  0.0205 Actual Evapotranspiration (lag = 0) P. viv~
##  8 2001-03-28 15:58:04 0.817  0.0206 Actual Evapotranspiration (lag = 0) P. viv~
##  9 2001-04-10 01:06:22 0.811  0.0206 Actual Evapotranspiration (lag = 0) P. viv~
## 10 2001-04-22 10:14:40 0.805  0.0207 Actual Evapotranspiration (lag = 0) P. viv~
## # ... with 14,990 more rows
df_tvc_dlm_5$variable <- factor(
  stringr::str_extract(df_tvc_dlm_5$term, ".*(?=\\s\\()"),
  levels = c("Actual Evapotranspiration", "Precipitation", "Min Temperature")
)

df_tvc_dlm_5$lag <- factor(
  stringr::str_extract(df_tvc_dlm_5$term, "(?<=\\().*(?=\\))"),
  levels = c("lag = 0", "lag = 1", "lag = 3", "lag = 6", "lag = 12")
)
df_tvc_dlm_5 %>% 
  ggplot2::ggplot() +
  facet_grid(cols = vars(specie), rows = vars(variable), scales = "free") +
  geom_line(mapping = aes(x = x, y = fit, color = lag), size = 1) +
  geom_ribbon(
    mapping = aes(
      x    = x,
      y    = fit,
      ymin = fit + se,
      ymax = fit - se,
      fill = lag
    ),
    alpha = 0.1
  ) +
  geom_rect(
    data    = df_pamafro_period,
    mapping = aes(xmin = start, xmax = end), 
    ymin    = -60, 
    ymax    = 60, 
    alpha   = 0.2
  ) +
  geom_hline(mapping  = aes(yintercept = 0), alpha = 0.6) +
  geom_vline(
    xintercept = c(df_pamafro_period$start, df_pamafro_period$end),
    linetype   = 'dashed',
    alpha      = 0.8
  ) +
  scale_x_datetime(date_labels = "%Y", date_breaks = "2 year") +
  ggsci::scale_color_npg() +
  ggsci::scale_fill_npg() +
  ylab("b(t)") +
  xlab(NULL) +
  theme(legend.position = "top", legend.title = element_blank())

Lagged variables models

1 month

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag1 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag1,   bs = "tp", k = 50) +
    s(time, by = prcp_lag1,  bs = "tp", k = 30) +
    s(time, by = tmin_lag1,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag1$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag1)
## 
## Family: Negative Binomial(0.843) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag1, bs = "tp", k = 50) + s(time, by = prcp_lag1, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag1, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.0157     0.3356  -20.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df       F p-value    
## s(district)       47.842 48.000 307.923 < 2e-16 ***
## s(time):aet_lag1   6.578  7.827   6.211 < 2e-16 ***
## s(time):prcp_lag1  2.001  2.002  11.054 1.6e-05 ***
## s(time):tmin_lag1 67.927 80.705   8.719 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.492   Deviance explained = 67.9%
## fREML =  14795  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag1, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  3.310003e-09  2.864826e-08 -2.549034e-05  3.080321e-08
## 
## $hess
##               [,1]          [,2]          [,3]          [,4]
## [1,]  2.351599e+01 -5.844994e-03 -2.001069e-06 -1.864916e-01
## [2,] -5.844994e-03  1.244763e+00  3.129586e-06 -2.405453e-01
## [3,] -2.001069e-06  3.129586e-06  2.523820e-05  3.656213e-05
## [4,] -1.864916e-01 -2.405453e-01  3.656213e-05  1.947748e+01
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                       k'    edf k-index p-value
## s(district)        49.00  47.84      NA      NA
## s(time):aet_lag1   50.00   6.58    0.89    0.92
## s(time):prcp_lag1  30.00   2.00    0.89    0.92
## s(time):tmin_lag1 100.00  67.93    0.89    0.92
mgcv::plot.gam(
  x          = vivax_tvcm_5_lag1, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_5_lag1)
##          para s(district) s(time):aet_lag1 s(time):prcp_lag1 s(time):tmin_lag1
## worst       1  1.00000000        0.9810728         0.9745491         0.9911320
## observed    1  0.13494760        0.9586554         0.8836913         0.9349924
## estimate    1  0.04423898        0.9619496         0.8996287         0.9685064
mgcv::concurvity(vivax_tvcm_5_lag1, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag1 s(time):prcp_lag1
## para              1.0000000  0.02040816        0.3300338         0.2947308
## s(district)       1.0000000  1.00000000        0.3311771         0.3075222
## s(time):aet_lag1  0.9659814  0.01989962        1.0000000         0.8075944
## s(time):prcp_lag1 0.8066758  0.01736603        0.7405954         1.0000000
## s(time):tmin_lag1 0.9689610  0.02028509        0.9511616         0.8776304
##                   s(time):tmin_lag1
## para                      0.3318437
## s(district)               0.3377886
## s(time):aet_lag1          0.9379776
## s(time):prcp_lag1         0.8498165
## s(time):tmin_lag1         1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag1 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag1,   bs = "tp", k = 50) +
    s(time, by = prcp_lag1,  bs = "tp", k = 30) +
    s(time, by = tmin_lag1,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.25 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag1$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag1)
## 
## Family: Negative Binomial(0.505) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag1, bs = "tp", k = 50) + s(time, by = prcp_lag1, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag1, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.3965     0.4008  -20.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df       F p-value    
## s(district)       47.58 49.000 184.966  <2e-16 ***
## s(time):aet_lag1   6.50  7.681  11.307  <2e-16 ***
## s(time):prcp_lag1  8.31 10.032   6.709  <2e-16 ***
## s(time):tmin_lag1 57.11 69.304   5.287  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.353   Deviance explained =   65%
## fREML =  13756  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag1, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -1.193661e-06 -6.655265e-08 -8.424542e-08 -6.779682e-07
## 
## $hess
##              [,1]         [,2]        [,3]       [,4]
## [1,] 22.881522614 -0.009027759  0.00271401 -0.1606189
## [2,] -0.009027759  0.966148434  0.07169012  0.1547939
## [3,]  0.002714010  0.071690120  1.45472250 -0.3079749
## [4,] -0.160618933  0.154793945 -0.30797488 11.4015357
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                       k'    edf k-index p-value
## s(district)        49.00  47.58      NA      NA
## s(time):aet_lag1   50.00   6.50    0.84    0.59
## s(time):prcp_lag1  30.00   8.31    0.84    0.61
## s(time):tmin_lag1 100.00  57.11    0.84    0.64
mgcv::plot.gam(
  x          = falciparum_tvcm_5_lag1, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_5_lag1)
##          para s(district) s(time):aet_lag1 s(time):prcp_lag1 s(time):tmin_lag1
## worst       1  1.00000000        0.9810728         0.9745491         0.9911320
## observed    1  0.09841707        0.9590690         0.9045041         0.9505211
## estimate    1  0.04423898        0.9619717         0.8995819         0.9685086
mgcv::concurvity(falciparum_tvcm_5_lag1, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag1 s(time):prcp_lag1
## para              1.0000000  0.02040816        0.3310103         0.2961423
## s(district)       1.0000000  1.00000000        0.3321567         0.3090055
## s(time):aet_lag1  0.9659814  0.01989962        1.0000000         0.8073822
## s(time):prcp_lag1 0.8066758  0.01736603        0.7404622         1.0000000
## s(time):tmin_lag1 0.9689610  0.02028509        0.9511435         0.8774920
##                   s(time):tmin_lag1
## para                      0.3321669
## s(district)               0.3381185
## s(time):aet_lag1          0.9379661
## s(time):prcp_lag1         0.8497555
## s(time):tmin_lag1         1.0000000

Plots

tvcm_5_lag1_terms <- c(
  "Actual Evapotranspiration (Lag = 1)",
  "Precipitation (Lag = 1)", 
  "Min Temperature (Lag = 1)"
)
df_tvcm_5_lag1 <- get_tvcoef(
  plot_vivax_tvcm_5_lag1, 
  plot_falciparum_tvcm_5_lag1, 
  tvcm_5_lag1_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag1)

3 months

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag3 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag3,   bs = "tp", k = 50) +
    s(time, by = prcp_lag3,  bs = "tp", k = 30) +
    s(time, by = tmin_lag3,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.17 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag3$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag3)
## 
## Family: Negative Binomial(0.844) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag3, bs = "tp", k = 50) + s(time, by = prcp_lag3, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag3, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.2658     0.3336  -21.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df       F p-value    
## s(district)       47.840 48.000 306.518 < 2e-16 ***
## s(time):aet_lag3  11.350 13.611   5.150 < 2e-16 ***
## s(time):prcp_lag3  3.076  3.569   4.222 0.00533 ** 
## s(time):tmin_lag3 68.537 81.279   7.143 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.489   Deviance explained = 67.9%
## fREML =  14813  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag3, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -1.865702e-08 -3.338955e-07 -8.264585e-09 -9.960136e-07
## 
## $hess
##              [,1]         [,2]         [,3]       [,4]
## [1,] 23.511996247 -0.013328885 -0.005484303 -0.2294394
## [2,] -0.013328885  1.793623635 -0.009030326 -0.2093404
## [3,] -0.005484303 -0.009030326  0.085106505  0.2407554
## [4,] -0.229439444 -0.209340353  0.240755368 15.4258222
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                       k'    edf k-index p-value
## s(district)        49.00  47.84      NA      NA
## s(time):aet_lag3   50.00  11.35    0.86    0.40
## s(time):prcp_lag3  30.00   3.08    0.86    0.35
## s(time):tmin_lag3 100.00  68.54    0.86    0.37
mgcv::plot.gam(
  x          = vivax_tvcm_5_lag3, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_5_lag3)
##          para s(district) s(time):aet_lag3 s(time):prcp_lag3 s(time):tmin_lag3
## worst       1  1.00000000        0.9810063         0.9410134         0.9909274
## observed    1  0.13019926        0.9524503         0.8867507         0.9365577
## estimate    1  0.04424801        0.9613851         0.8968900         0.9676146
mgcv::concurvity(vivax_tvcm_5_lag3, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag3 s(time):prcp_lag3
## para              1.0000000  0.02040816        0.3288539         0.2950539
## s(district)       1.0000000  1.00000000        0.3300087         0.3073334
## s(time):aet_lag3  0.9663080  0.01990851        1.0000000         0.8053587
## s(time):prcp_lag3 0.8038787  0.01732113        0.7184329         1.0000000
## s(time):tmin_lag3 0.9688780  0.02028480        0.9495266         0.8744118
##                   s(time):tmin_lag3
## para                      0.3324783
## s(district)               0.3385703
## s(time):aet_lag3          0.9382147
## s(time):prcp_lag3         0.8375486
## s(time):tmin_lag3         1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag3 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag3,   bs = "tp", k = 50) +
    s(time, by = prcp_lag3,  bs = "tp", k = 30) +
    s(time, by = tmin_lag3,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.12 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag3$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag3)
## 
## Family: Negative Binomial(0.502) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag3, bs = "tp", k = 50) + s(time, by = prcp_lag3, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag3, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.6774     0.3963   -21.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df       F  p-value    
## s(district)       47.572 48.000 186.424  < 2e-16 ***
## s(time):aet_lag3   7.063  8.364  12.027  < 2e-16 ***
## s(time):prcp_lag3  7.705  9.264   3.219 0.000561 ***
## s(time):tmin_lag3 52.876 64.661   4.865  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.374   Deviance explained = 64.8%
## fREML =  13746  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag3, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 2.149497e-08 1.389648e-07 1.036213e-07 8.450160e-07
## 
## $hess
##               [,1]         [,2]         [,3]        [,4]
## [1,] 22.8817615420 0.0007673148 -0.002532685 -0.21035931
## [2,]  0.0007673148 0.2331692112  0.060018827  0.03025332
## [3,] -0.0025326852 0.0600188275  0.638429428 -0.01355408
## [4,] -0.2103593129 0.0302533204 -0.013554075  6.75182477
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                       k'    edf k-index p-value
## s(district)        49.00  47.57      NA      NA
## s(time):aet_lag3   50.00   7.06    0.82    0.46
## s(time):prcp_lag3  30.00   7.70    0.82    0.43
## s(time):tmin_lag3 100.00  52.88    0.82    0.38
mgcv::plot.gam(
  x          = falciparum_tvcm_5_lag3, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_5_lag3)
##          para s(district) s(time):aet_lag3 s(time):prcp_lag3 s(time):tmin_lag3
## worst       1  1.00000000        0.9810063         0.9410134         0.9909274
## observed    1  0.09225860        0.9583952         0.8787201         0.9647161
## estimate    1  0.04424801        0.9614316         0.8968682         0.9676456
mgcv::concurvity(falciparum_tvcm_5_lag3, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag3 s(time):prcp_lag3
## para              1.0000000  0.02040816        0.3308493         0.2975150
## s(district)       1.0000000  1.00000000        0.3320115         0.3099293
## s(time):aet_lag3  0.9663080  0.01990851        1.0000000         0.8050513
## s(time):prcp_lag3 0.8038787  0.01732113        0.7183098         1.0000000
## s(time):tmin_lag3 0.9688780  0.02028480        0.9494990         0.8742286
##                   s(time):tmin_lag3
## para                      0.3338450
## s(district)               0.3399601
## s(time):aet_lag3          0.9381868
## s(time):prcp_lag3         0.8374833
## s(time):tmin_lag3         1.0000000

Plots

tvcm_5_lag3_terms <- c(
  "Actual Evapotranspiration (Lag = 3)",
  "Precipitation (Lag = 3)", 
  "Min Temperature (Lag = 3)"
)
df_tvcm_5_lag3 <- get_tvcoef(
  plot_vivax_tvcm_5_lag3, 
  plot_falciparum_tvcm_5_lag3, 
  tvcm_5_lag3_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag3)

6 months

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag6 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag6,   bs = "tp", k = 50) +
    s(time, by = prcp_lag6,  bs = "tp", k = 30) +
    s(time, by = tmin_lag6,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag6$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag6)
## 
## Family: Negative Binomial(0.851) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag6, bs = "tp", k = 50) + s(time, by = prcp_lag6, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag6, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.1630     0.3346  -21.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df       F p-value    
## s(district)       47.84  49.00 302.781 < 2e-16 ***
## s(time):aet_lag6  14.79  17.67   7.402 < 2e-16 ***
## s(time):prcp_lag6  2.00   2.00   5.264 0.00519 ** 
## s(time):tmin_lag6 64.98  77.74   8.105 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.507   Deviance explained = 68.2%
## fREML =  14810  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag6, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -5.321594e-07  3.465836e-07 -1.043507e-04  4.475891e-06
## 
## $hess
##               [,1]          [,2]          [,3]          [,4]
## [1,]  2.351450e+01 -3.703506e-02  5.273644e-07 -2.871711e-02
## [2,] -3.703506e-02  2.875807e+00 -1.588277e-07  4.874460e-01
## [3,]  5.273644e-07 -1.588277e-07  1.043293e-04 -4.184334e-06
## [4,] -2.871711e-02  4.874460e-01 -4.184334e-06  2.361061e+01
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                      k'   edf k-index p-value
## s(district)        49.0  47.8      NA      NA
## s(time):aet_lag6   50.0  14.8    0.87    0.46
## s(time):prcp_lag6  30.0   2.0    0.87    0.54
## s(time):tmin_lag6 100.0  65.0    0.87    0.42
mgcv::plot.gam(
  x          = vivax_tvcm_5_lag6, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_5_lag6)
##          para s(district) s(time):aet_lag6 s(time):prcp_lag6 s(time):tmin_lag6
## worst       1  1.00000000        0.9814022         0.9612554         0.9909056
## observed    1  0.13611775        0.9497681         0.8720685         0.9500013
## estimate    1  0.04413469        0.9623054         0.8983731         0.9677595
mgcv::concurvity(vivax_tvcm_5_lag6, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag6 s(time):prcp_lag6
## para              1.0000000  0.02040816        0.3282686         0.2932652
## s(district)       1.0000000  1.00000000        0.3294415         0.3049702
## s(time):aet_lag6  0.9663219  0.01990864        1.0000000         0.8077621
## s(time):prcp_lag6 0.8050018  0.01732512        0.7312992         1.0000000
## s(time):tmin_lag6 0.9689491  0.02028989        0.9507838         0.8760512
##                   s(time):tmin_lag6
## para                      0.3316525
## s(district)               0.3377419
## s(time):aet_lag6          0.9400951
## s(time):prcp_lag6         0.8355413
## s(time):tmin_lag6         1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag6 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag6,   bs = "tp", k = 50) +
    s(time, by = prcp_lag6,  bs = "tp", k = 30) +
    s(time, by = tmin_lag6,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.16 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag6$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag6)
## 
## Family: Negative Binomial(0.51) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag6, bs = "tp", k = 50) + s(time, by = prcp_lag6, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag6, bs = "tp", k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.7259     0.4014  -21.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df       F  p-value    
## s(district)       47.58  48.00 190.268  < 2e-16 ***
## s(time):aet_lag6  11.22  13.41  11.087  < 2e-16 ***
## s(time):prcp_lag6  2.00   2.00  10.164 3.94e-05 ***
## s(time):tmin_lag6 62.16  74.84   5.632  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.368   Deviance explained = 65.2%
## fREML =  13772  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag6, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1]  1.306886e-07 -1.588761e-06 -7.249868e-05 -4.170213e-06
## 
## $hess
##               [,1]          [,2]          [,3]          [,4]
## [1,]  2.287493e+01 -7.612699e-03 -1.388942e-07 -1.208812e-01
## [2,] -7.612699e-03  1.100414e+00  1.395135e-06 -1.687119e-01
## [3,] -1.388942e-07  1.395135e-06  7.248513e-05  3.602796e-06
## [4,] -1.208812e-01 -1.687119e-01  3.602796e-06  1.871457e+01
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                      k'   edf k-index p-value
## s(district)        49.0  47.6      NA      NA
## s(time):aet_lag6   50.0  11.2    0.82    0.56
## s(time):prcp_lag6  30.0   2.0    0.82    0.49
## s(time):tmin_lag6 100.0  62.2    0.82    0.48
mgcv::plot.gam(
  x          = falciparum_tvcm_5_lag6, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_5_lag6)
##          para s(district) s(time):aet_lag6 s(time):prcp_lag6 s(time):tmin_lag6
## worst       1  1.00000000        0.9814022         0.9612554         0.9909056
## observed    1  0.09867811        0.9504558         0.8783491         0.9574366
## estimate    1  0.04413469        0.9623489         0.8983495         0.9677900
mgcv::concurvity(falciparum_tvcm_5_lag6, full = FALSE)$estimate
##                        para s(district) s(time):aet_lag6 s(time):prcp_lag6
## para              1.0000000  0.02040816        0.3302841         0.2957132
## s(district)       1.0000000  1.00000000        0.3314647         0.3075541
## s(time):aet_lag6  0.9663219  0.01990864        1.0000000         0.8074452
## s(time):prcp_lag6 0.8050018  0.01732512        0.7311948         1.0000000
## s(time):tmin_lag6 0.9689491  0.02028989        0.9507513         0.8758679
##                   s(time):tmin_lag6
## para                      0.3330126
## s(district)               0.3391256
## s(time):aet_lag6          0.9400653
## s(time):prcp_lag6         0.8354854
## s(time):tmin_lag6         1.0000000

Plots

tvcm_5_lag6_terms <- c(
  "Actual Evapotranspiration (Lag = 6)",
  "Precipitation (Lag = 6)", 
  "Min Temperature (Lag = 6)"
)
df_tvcm_5_lag6 <- get_tvcoef(
  plot_vivax_tvcm_5_lag6, 
  plot_falciparum_tvcm_5_lag6, 
  tvcm_5_lag6_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag6)

12 months

P. vivax

tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag12 <- mgcv::bam(
  formula = 
    vivax ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag12,   bs = "tp", k = 50) +
    s(time, by = prcp_lag12,  bs = "tp", k = 30) +
    s(time, by = tmin_lag12,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.99 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag12$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag12)
## 
## Family: Negative Binomial(0.837) 
## Link function: log 
## 
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag12, bs = "tp", k = 50) + s(time, by = prcp_lag12, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag12, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.6713     0.3347  -19.93   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df       F  p-value    
## s(district)        47.843 49.000 298.096  < 2e-16 ***
## s(time):aet_lag12   4.549  5.329   3.133 0.006480 ** 
## s(time):prcp_lag12  4.390  5.208   4.993 0.000128 ***
## s(time):tmin_lag12 67.194 79.923   9.681  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.495   Deviance explained = 67.7%
## fREML =  14792  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag12, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 4.187093e-09 3.229654e-08 2.757584e-08 2.976799e-07
## 
## $hess
##               [,1]          [,2]         [,3]        [,4]
## [1,] 23.5132411919  0.0003037737 -0.004400675 -0.06419612
## [2,]  0.0003037737  0.5908829528  0.029270943 -0.09181087
## [3,] -0.0044006747  0.0292709425  0.639478867 -0.23036588
## [4,] -0.0641961208 -0.0918108674 -0.230365875 18.74634005
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                        k'    edf k-index p-value  
## s(district)         49.00  47.84      NA      NA  
## s(time):aet_lag12   50.00   4.55    0.85   0.065 .
## s(time):prcp_lag12  30.00   4.39    0.85   0.095 .
## s(time):tmin_lag12 100.00  67.19    0.85   0.095 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
  x          = vivax_tvcm_5_lag12, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(vivax_tvcm_5_lag12)
##          para s(district) s(time):aet_lag12 s(time):prcp_lag12
## worst       1  1.00000000         0.9812527          0.9509801
## observed    1  0.14363243         0.9687927          0.9029249
## estimate    1  0.04433167         0.9631260          0.9004555
##          s(time):tmin_lag12
## worst             0.9908588
## observed          0.9372290
## estimate          0.9673412
mgcv::concurvity(vivax_tvcm_5_lag12, full = FALSE)$estimate
##                         para s(district) s(time):aet_lag12 s(time):prcp_lag12
## para               1.0000000  0.02040816         0.3223619          0.2949887
## s(district)        1.0000000  1.00000000         0.3235868          0.3075754
## s(time):aet_lag12  0.9661201  0.01991534         1.0000000          0.8024519
## s(time):prcp_lag12 0.8032460  0.01731527         0.7229075          1.0000000
## s(time):tmin_lag12 0.9690735  0.02028380         0.9520233          0.8768135
##                    s(time):tmin_lag12
## para                        0.3285054
## s(district)                 0.3343234
## s(time):aet_lag12           0.9396888
## s(time):prcp_lag12          0.8398721
## s(time):tmin_lag12          1.0000000

P. falciparum

tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag12 <- mgcv::bam(
  formula = 
    falciparum ~ 
    offset(ln_pop2015) + 
    s(district, bs = "re") +
    s(time, by = aet_lag12,   bs = "tp", k = 50) +
    s(time, by = prcp_lag12,  bs = "tp", k = 30) +
    s(time, by = tmin_lag12,  bs = "tp", k = 100),
  family   = nb(),
  data     = dat,
  method   = "fREML",
  discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.29 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag12$converged, "\n")
## 
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag12)
## 
## Family: Negative Binomial(0.5) 
## Link function: log 
## 
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time, 
##     by = aet_lag12, bs = "tp", k = 50) + s(time, by = prcp_lag12, 
##     bs = "tp", k = 30) + s(time, by = tmin_lag12, bs = "tp", 
##     k = 100)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.8236     0.3972   -19.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                       edf Ref.df       F  p-value    
## s(district)        47.579 48.000 187.037  < 2e-16 ***
## s(time):aet_lag12   5.718  6.721  13.743  < 2e-16 ***
## s(time):prcp_lag12  7.341  8.829   4.212 3.06e-05 ***
## s(time):tmin_lag12 52.717 64.372   4.839  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.379   Deviance explained = 64.7%
## fREML =  13741  Scale est. = 1         n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag12, pch = 19, cex = 0.3)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] 6.931163e-09 2.483024e-08 2.627335e-08 1.651579e-07
## 
## $hess
##               [,1]         [,2]          [,3]        [,4]
## [1,] 22.8592386407 -0.002396716 -0.0003409251 -0.06632696
## [2,] -0.0023967157  0.645925425  0.0063204595 -0.01323179
## [3,] -0.0003409251  0.006320460  1.0481055181  0.04622888
## [4,] -0.0663269569 -0.013231789  0.0462288764 10.17568908
## 
## Model rank =  230 / 230 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                        k'    edf k-index p-value
## s(district)         49.00  47.58      NA      NA
## s(time):aet_lag12   50.00   5.72    0.83    0.55
## s(time):prcp_lag12  30.00   7.34    0.83    0.57
## s(time):tmin_lag12 100.00  52.72    0.83    0.64
mgcv::plot.gam(
  x          = falciparum_tvcm_5_lag12, 
  n          = 500, 
  seWithMean = TRUE, 
  scale      = 0, 
  pages      = 1
)

mgcv::concurvity(falciparum_tvcm_5_lag12)
##          para s(district) s(time):aet_lag12 s(time):prcp_lag12
## worst       1  1.00000000         0.9812527          0.9509801
## observed    1  0.10756223         0.9657721          0.8938823
## estimate    1  0.04433167         0.9631651          0.9004515
##          s(time):tmin_lag12
## worst             0.9908588
## observed          0.9478304
## estimate          0.9673760
mgcv::concurvity(falciparum_tvcm_5_lag12, full = FALSE)$estimate
##                         para s(district) s(time):aet_lag12 s(time):prcp_lag12
## para               1.0000000  0.02040816         0.3242936          0.2974126
## s(district)        1.0000000  1.00000000         0.3255266          0.3101337
## s(time):aet_lag12  0.9661201  0.01991534         1.0000000          0.8022129
## s(time):prcp_lag12 0.8032460  0.01731527         0.7228119          1.0000000
## s(time):tmin_lag12 0.9690735  0.02028380         0.9519893          0.8766634
##                    s(time):tmin_lag12
## para                        0.3299275
## s(district)                 0.3357694
## s(time):aet_lag12           0.9396622
## s(time):prcp_lag12          0.8398213
## s(time):tmin_lag12          1.0000000

Plots

tvcm_5_lag12_terms <- c(
  "Actual Evapotranspiration (Lag = 12)",
  "Precipitation (Lag = 12)", 
  "Min Temperature (Lag = 12)"
)
df_tvcm_5_lag12 <- get_tvcoef(
  plot_vivax_tvcm_5_lag12, 
  plot_falciparum_tvcm_5_lag12, 
  tvcm_5_lag12_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag12)

df_malaria_loreto %>% 
  group_by(district) %>% 
  summarise(
    vivax_zeros = 100 * (sum(vivax < 1)/n()),
    falciparum_zeros = 100 * (sum(falciparum < 1)/n())
  ) %>% 
  arrange(desc(vivax_zeros))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 49 x 3
##    district       vivax_zeros falciparum_zeros
##    <fct>                <dbl>            <dbl>
##  1 PADRE MARQUEZ         97.7             99.1
##  2 INAHUAYA              96.8             99.5
##  3 PAMPA HERMOSA         94.0             99.5
##  4 VARGAS GUERRA         91.2             98.1
##  5 PUTUMAYO              83.3             84.7
##  6 CAPELO                75.5             97.7
##  7 PUINAHUA              72.7             94.9
##  8 SARAYACU              70.8             93.5
##  9 CONTAMANA             69.4             94.9
## 10 JENARO HERRERA        65.3             95.8
## # ... with 39 more rows

Compare models

Lag 0

tvcm_lag0_terms <- c(
  "Actual Evapotranspiration (lag = 0)",
  "Precipitation (lag = 0)",
  "Min Temperature (lag = 0)"
)
df_tvcm_lag0 <- get_tvcoef(
  plot_vivax_tvcm_lag0, 
  plot_falciparum_tvcm_lag0, 
  tvcm_lag0_terms
)

Lag 1

tvcm_lag1_terms <- c(
  "Actual Evapotranspiration (lag = 1)",
  "Precipitation (lag = 1)",
  "Min Temperature (lag = 1)"
)
df_tvcm_lag1 <- get_tvcoef(
  plot_vivax_tvcm_lag1, 
  plot_falciparum_tvcm_lag1, 
  tvcm_lag1_terms
)

Lag 3

tvcm_lag3_terms <- c(
  "Actual Evapotranspiration (lag = 3)",
  "Precipitation (lag = 3)",
  "Min Temperature (lag = 3)"
)
df_tvcm_lag3 <- get_tvcoef(
  plot_vivax_tvcm_lag3, 
  plot_falciparum_tvcm_lag3, 
  tvcm_lag3_terms
)

Lag 6

tvcm_lag6_terms <- c(
  "Actual Evapotranspiration (lag = 6)",
  "Precipitation (lag = 6)",
  "Min Temperature (lag = 6)"
)
df_tvcm_lag6 <- get_tvcoef(
  plot_vivax_tvcm_lag6, 
  plot_falciparum_tvcm_lag6, 
  tvcm_lag6_terms
)

Lag 12

tvcm_lag12_terms <- c(
  "Actual Evapotranspiration (lag = 12)",
  "Precipitation (lag = 12)",
  "Min Temperature (lag = 12)"
)
df_tvcm_lag12 <- get_tvcoef(
  plot_vivax_tvcm_lag12, 
  plot_falciparum_tvcm_lag12, 
  tvcm_lag12_terms
)

Figure 4

df_tvcm_lagged <- rbind(
  df_tvcm_lag0,
  df_tvcm_lag1,
  df_tvcm_lag3,
  df_tvcm_lag6,
  df_tvcm_lag12
)
df_tvcm_lagged
## # A tibble: 15,000 x 5
##    x                      se fit[,1] term                                specie 
##    <dttm>              <dbl>   <dbl> <fct>                               <fct>  
##  1 2001-01-01 00:00:00 0.939  -0.883 Actual Evapotranspiration (lag = 0) P. viv~
##  2 2001-01-13 09:08:17 0.924  -0.871 Actual Evapotranspiration (lag = 0) P. viv~
##  3 2001-01-25 18:16:35 0.909  -0.859 Actual Evapotranspiration (lag = 0) P. viv~
##  4 2001-02-07 03:24:53 0.895  -0.847 Actual Evapotranspiration (lag = 0) P. viv~
##  5 2001-02-19 12:33:11 0.881  -0.835 Actual Evapotranspiration (lag = 0) P. viv~
##  6 2001-03-03 21:41:28 0.867  -0.823 Actual Evapotranspiration (lag = 0) P. viv~
##  7 2001-03-16 06:49:46 0.853  -0.811 Actual Evapotranspiration (lag = 0) P. viv~
##  8 2001-03-28 15:58:04 0.840  -0.799 Actual Evapotranspiration (lag = 0) P. viv~
##  9 2001-04-10 01:06:22 0.827  -0.787 Actual Evapotranspiration (lag = 0) P. viv~
## 10 2001-04-22 10:14:40 0.814  -0.775 Actual Evapotranspiration (lag = 0) P. viv~
## # ... with 14,990 more rows
df_tvcm_lagged$variable <- factor(
  stringr::str_extract(df_tvcm_lagged$term, ".*(?=\\s\\()"),
  levels = c("Actual Evapotranspiration", "Precipitation", "Min Temperature")
)

df_tvcm_lagged$lag <- factor(
  stringr::str_extract(df_tvcm_lagged$term, "(?<=\\().*(?=\\))"),
  levels = c("lag = 0", "lag = 1", "lag = 3", "lag = 6", "lag = 12")
)
df_tvcm_lagged %>% 
  ggplot2::ggplot() +
  facet_grid(cols = vars(specie), rows = vars(variable), scales = "free") +
  geom_line(mapping = aes(x = x, y = fit, color = lag), size = 1) +
  geom_ribbon(
    mapping = aes(
      x    = x,
      y    = fit,
      ymin = fit + se,
      ymax = fit - se,
      fill = lag
    ),
    alpha = 0.1
  ) +
  geom_rect(
    data    = df_pamafro_period,
    mapping = aes(xmin = start, xmax = end), 
    ymin    = -60, 
    ymax    = 60, 
    alpha   = 0.2
  ) +
  geom_hline(mapping  = aes(yintercept = 0), alpha = 0.6) +
  geom_vline(
    xintercept = c(df_pamafro_period$start, df_pamafro_period$end),
    linetype   = 'dashed',
    alpha      = 0.8
  ) +
  scale_x_datetime(date_labels = "%Y", date_breaks = "2 year") +
  ggsci::scale_color_npg() +
  ggsci::scale_fill_npg() +
  ylab("b(t)") +
  xlab(NULL) +
  theme(legend.position = "top", legend.title = element_blank())

AIC(
  vivax_tvcm_5,
  vivax_tvcm_5_lag1,
  vivax_tvcm_5_lag3,
  vivax_tvcm_5_lag6,
  vivax_tvcm_5_lag12
) -> vivax_tvcm_5_aic

Vivax (AIC: 70783.24, DF: 130.3846) (AIC: 70689.94, DF: 133.4880)

vivax_tvcm_5_aic
##                          df      AIC
## vivax_tvcm_5       130.3846 70783.24
## vivax_tvcm_5_lag1  128.3019 70755.05
## vivax_tvcm_5_lag3  137.4845 70763.09
## vivax_tvcm_5_lag6  133.4880 70689.94
## vivax_tvcm_5_lag12 128.8622 70807.74
readr::write_csv(vivax_tvcm_5_aic, "eval_vivax_lagged_tvcm.csv")
AIC(
  falciparum_tvcm_5,
  falciparum_tvcm_5_lag1,
  falciparum_tvcm_5_lag3,
  falciparum_tvcm_5_lag6,
  falciparum_tvcm_5_lag12
) -> falciparum_tvcm_5_aic

Falciparum (AIC: 46213.92, DF: 122.0533) (AIC: 46137.46, DF: 127.7910)

falciparum_tvcm_5_aic
##                               df      AIC
## falciparum_tvcm_5       122.0533 46213.92
## falciparum_tvcm_5_lag1  124.5639 46174.96
## falciparum_tvcm_5_lag3  123.0266 46216.87
## falciparum_tvcm_5_lag6  127.7910 46137.46
## falciparum_tvcm_5_lag12 118.5578 46221.94
readr::write_csv(falciparum_tvcm_5_aic, "eval_falciparum_lagged_tvcm.csv")